Compare commits

...

7 Commits

Author SHA1 Message Date
Conrad Irwin
7ffa87e566 Make ssh remoting work 2024-08-23 15:59:29 -06:00
Conrad Irwin
f7a532add8 WIP 2024-08-23 14:57:55 -06:00
Conrad Irwin
3a67fb68da TEMP 2024-08-23 13:46:40 -06:00
Conrad Irwin
dd7105ff38 Revamp... 2024-08-23 13:32:24 -06:00
Conrad Irwin
15dfb47b17 SSH remoting project search 2024-08-23 11:18:46 -06:00
Conrad Irwin
0f57932945 Move seearch code from project.rs -> search.rs 2024-08-22 22:33:30 -06:00
Conrad Irwin
42c96212c4 TEMP 2024-08-22 21:39:28 -06:00
12 changed files with 733 additions and 543 deletions

View File

@@ -28,8 +28,8 @@ use live_kit_client::MacOSDisplay;
use lsp::LanguageServerId;
use parking_lot::Mutex;
use project::{
search::SearchQuery, DiagnosticSummary, FormatTrigger, HoverBlockKind, Project, ProjectPath,
SearchResult,
search::{SearchQuery, SearchResult},
DiagnosticSummary, FormatTrigger, HoverBlockKind, Project, ProjectPath,
};
use rand::prelude::*;
use serde_json::json;

View File

@@ -15,7 +15,8 @@ use language::{
use lsp::FakeLanguageServer;
use pretty_assertions::assert_eq;
use project::{
search::SearchQuery, Project, ProjectPath, SearchResult, DEFAULT_COMPLETION_CONTEXT,
search::{SearchQuery, SearchResult},
Project, ProjectPath, DEFAULT_COMPLETION_CONTEXT,
};
use rand::{
distributions::{Alphanumeric, DistString},

View File

@@ -1,6 +1,7 @@
use crate::{
search::{SearchMatchCandidate, SearchQuery},
worktree_store::{WorktreeStore, WorktreeStoreEvent},
NoRepositoryError, ProjectPath,
Item, NoRepositoryError, ProjectPath,
};
use anyhow::{anyhow, Context as _, Result};
use collections::{hash_map, HashMap};
@@ -778,6 +779,61 @@ impl BufferStore {
.retain(|_, buffer| !matches!(buffer, OpenBuffer::Operations(_)));
}
/// Returns open buffers filtered by filename
/// Does *not* check the buffer content, the caller must do that
pub(crate) fn find_seach_candidates(
&self,
query: &SearchQuery,
cx: &ModelContext<Self>,
) -> Vec<SearchMatchCandidate> {
let include_root = self
.worktree_store
.read(cx)
.visible_worktrees(cx)
.collect::<Vec<_>>()
.len()
> 1;
self.buffers()
.filter_map(|buffer| {
let handle = buffer.clone();
buffer.read_with(cx, |buffer, cx| {
let worktree_store = self.worktree_store.read(cx);
let entry_id = buffer.entry_id(cx);
let is_ignored = entry_id
.and_then(|entry_id| worktree_store.entry_for_id(entry_id, cx))
.map_or(false, |entry| entry.is_ignored);
if is_ignored && !query.include_ignored() {
return None;
}
if let Some(file) = buffer.file() {
let matched_path = if include_root {
query.file_matches(Some(&file.full_path(cx)))
} else {
query.file_matches(Some(file.path()))
};
if matched_path {
Some(SearchMatchCandidate::OpenBuffer {
buffer: handle,
entry_id,
path: Some(file.path().clone()),
})
} else {
None
}
} else {
Some(SearchMatchCandidate::OpenBuffer {
buffer: handle,
entry_id,
path: None,
})
}
})
})
.collect()
}
fn on_buffer_event(
&mut self,
buffer: Model<Buffer>,

View File

@@ -24,7 +24,7 @@ use client::{
TypedEnvelope, UserStore,
};
use clock::ReplicaId;
use collections::{btree_map, BTreeMap, BTreeSet, HashMap, HashSet, VecDeque};
use collections::{btree_map, BTreeMap, BTreeSet, HashMap, HashSet};
use debounced_delay::DebouncedDelay;
use futures::{
channel::mpsc::{self, UnboundedReceiver},
@@ -37,8 +37,8 @@ use futures::{
use git::{blame::Blame, repository::GitRepository};
use globset::{Glob, GlobSet, GlobSetBuilder};
use gpui::{
AnyModel, AppContext, AsyncAppContext, BackgroundExecutor, BorrowAppContext, Context, Entity,
EventEmitter, Model, ModelContext, PromptLevel, SharedString, Task, WeakModel, WindowContext,
AnyModel, AppContext, AsyncAppContext, BorrowAppContext, Context, Entity, EventEmitter, Model,
ModelContext, PromptLevel, SharedString, Task, WeakModel, WindowContext,
};
use http_client::HttpClient;
use itertools::Itertools;
@@ -78,22 +78,19 @@ use project_settings::{DirenvSettings, LspSettings, ProjectSettings};
use rand::prelude::*;
use remote::SshSession;
use rpc::{proto::AddWorktree, ErrorCode};
use search::SearchQuery;
use search::{sort_search_matches, SearchMatchCandidate, SearchQuery, SearchResult};
use search_history::SearchHistory;
use serde::Serialize;
use settings::{watch_config_file, Settings, SettingsLocation, SettingsStore};
use sha2::{Digest, Sha256};
use similar::{ChangeTag, TextDiff};
use smol::{
channel::{Receiver, Sender},
lock::Semaphore,
};
use smol::channel::{Receiver, Sender};
use snippet::Snippet;
use snippet_provider::SnippetProvider;
use std::{
borrow::Cow,
cell::RefCell,
cmp::{self, Ordering},
cmp::Ordering,
convert::TryInto,
env,
ffi::OsStr,
@@ -680,29 +677,6 @@ impl DirectoryLister {
}
}
#[derive(Clone, Debug, PartialEq)]
enum SearchMatchCandidate {
OpenBuffer {
buffer: Model<Buffer>,
// This might be an unnamed file without representation on filesystem
path: Option<Arc<Path>>,
},
Path {
worktree_id: WorktreeId,
is_ignored: bool,
is_file: bool,
path: Arc<Path>,
},
}
pub enum SearchResult {
Buffer {
buffer: Model<Buffer>,
ranges: Vec<Range<Anchor>>,
},
LimitReached,
}
#[cfg(any(test, feature = "test-support"))]
pub const DEFAULT_COMPLETION_CONTEXT: CompletionContext = CompletionContext {
trigger_kind: lsp::CompletionTriggerKind::INVOKED,
@@ -1946,6 +1920,14 @@ impl Project {
}
}
pub fn is_local(&self) -> bool {
self.is_local_or_ssh() && self.ssh_session.is_none()
}
pub fn is_via_ssh(&self) -> bool {
self.ssh_session.is_some()
}
pub fn is_via_collab(&self) -> bool {
!self.is_local_or_ssh()
}
@@ -7242,176 +7224,106 @@ impl Project {
}
}
#[allow(clippy::type_complexity)]
pub fn search(
&self,
query: SearchQuery,
cx: &mut ModelContext<Self>,
) -> Receiver<SearchResult> {
if self.is_local_or_ssh() {
self.search_local(query, cx)
} else if let Some(project_id) = self.remote_id() {
let (tx, rx) = smol::channel::unbounded();
return self.search_local_or_ssh(query, cx);
}
let (tx, rx) = smol::channel::unbounded();
if let Some(project_id) = self.remote_id() {
let request = self.client.request(query.to_proto(project_id));
cx.spawn(move |this, mut cx| async move {
cx.spawn(move |this, cx| async move {
let response = request.await?;
let mut result = HashMap::default();
for location in response.locations {
let buffer_id = BufferId::new(location.buffer_id)?;
let target_buffer = this
.update(&mut cx, |this, cx| {
this.wait_for_remote_buffer(buffer_id, cx)
})?
.await?;
let start = location
.start
.and_then(deserialize_anchor)
.ok_or_else(|| anyhow!("missing target start"))?;
let end = location
.end
.and_then(deserialize_anchor)
.ok_or_else(|| anyhow!("missing target end"))?;
result
.entry(target_buffer)
.or_insert(Vec::new())
.push(start..end)
}
for (buffer, ranges) in result {
let _ = tx.send(SearchResult::Buffer { buffer, ranges }).await;
}
if response.limit_reached {
let _ = tx.send(SearchResult::LimitReached).await;
}
Result::<(), anyhow::Error>::Ok(())
Self::process_search_response(this, response, tx, cx).await
})
.detach_and_log_err(cx);
rx
} else {
unimplemented!();
}
unimplemented!()
};
rx
}
pub fn search_local(
async fn process_search_response(
this: WeakModel<Self>,
response: proto::SearchProjectResponse,
tx: smol::channel::Sender<SearchResult>,
mut cx: AsyncAppContext,
) -> anyhow::Result<()> {
let mut result = HashMap::default();
for location in response.locations {
let buffer_id = BufferId::new(location.buffer_id)?;
let target_buffer = this
.update(&mut cx, |this, cx| {
this.wait_for_remote_buffer(buffer_id, cx)
})?
.await?;
let start = location
.start
.and_then(deserialize_anchor)
.ok_or_else(|| anyhow!("missing target start"))?;
let end = location
.end
.and_then(deserialize_anchor)
.ok_or_else(|| anyhow!("missing target end"))?;
result
.entry(target_buffer)
.or_insert(Vec::new())
.push(start..end)
}
for (buffer, ranges) in result {
let _ = tx.send(SearchResult::Buffer { buffer, ranges }).await;
}
if response.limit_reached {
let _ = tx.send(SearchResult::LimitReached).await;
}
anyhow::Ok(())
}
pub fn search_local_or_ssh(
&self,
query: SearchQuery,
cx: &mut ModelContext<Self>,
) -> Receiver<SearchResult> {
// Local search is split into several phases.
// TL;DR is that we do 2 passes; initial pass to pick files which contain at least one match
// and the second phase that finds positions of all the matches found in the candidate files.
// The Receiver obtained from this function returns matches sorted by buffer path. Files without a buffer path are reported first.
//
// It gets a bit hairy though, because we must account for files that do not have a persistent representation
// on FS. Namely, if you have an untitled buffer or unsaved changes in a buffer, we want to scan that too.
//
// 1. We initialize a queue of match candidates and feed all opened buffers into it (== unsaved files / untitled buffers).
// Then, we go through a worktree and check for files that do match a predicate. If the file had an opened version, we skip the scan
// of FS version for that file altogether - after all, what we have in memory is more up-to-date than what's in FS.
// 2. At this point, we have a list of all potentially matching buffers/files.
// We sort that list by buffer path - this list is retained for later use.
// We ensure that all buffers are now opened and available in project.
// 3. We run a scan over all the candidate buffers on multiple background threads.
// We cannot assume that there will even be a match - while at least one match
// is guaranteed for files obtained from FS, the buffers we got from memory (unsaved files/unnamed buffers) might not have a match at all.
// There is also an auxiliary background thread responsible for result gathering.
// This is where the sorted list of buffers comes into play to maintain sorted order; Whenever this background thread receives a notification (buffer has/doesn't have matches),
// it keeps it around. It reports matches in sorted order, though it accepts them in unsorted order as well.
// As soon as the match info on next position in sorted order becomes available, it reports it (if it's a match) or skips to the next
// entry - which might already be available thanks to out-of-order processing.
//
// We could also report matches fully out-of-order, without maintaining a sorted list of matching paths.
// This however would mean that project search (that is the main user of this function) would have to do the sorting itself, on the go.
// This isn't as straightforward as running an insertion sort sadly, and would also mean that it would have to care about maintaining match index
// in face of constantly updating list of sorted matches.
// Meanwhile, this implementation offers index stability, since the matches are already reported in a sorted order.
let snapshots = self
.visible_worktrees(cx)
.filter_map(|tree| {
let tree = tree.read(cx);
Some((tree.snapshot(), tree.as_local()?.settings()))
})
.collect::<Vec<_>>();
let include_root = snapshots.len() > 1;
let background = cx.background_executor().clone();
let path_count: usize = snapshots
.iter()
.map(|(snapshot, _)| {
if query.include_ignored() {
snapshot.file_count()
} else {
snapshot.visible_file_count()
}
})
.sum();
if path_count == 0 {
let (_, rx) = smol::channel::bounded(1024);
return rx;
}
let workers = background.num_cpus().min(path_count);
let (matching_paths_tx, matching_paths_rx) = smol::channel::bounded(1024);
let mut unnamed_files = vec![];
let opened_buffers = self.buffer_store.update(cx, |buffer_store, cx| {
buffer_store
.buffers()
.filter_map(|buffer| {
let (is_ignored, snapshot) = buffer.update(cx, |buffer, cx| {
let is_ignored = buffer
.project_path(cx)
.and_then(|path| self.entry_for_path(&path, cx))
.map_or(false, |entry| entry.is_ignored);
(is_ignored, buffer.snapshot())
});
if is_ignored && !query.include_ignored() {
return None;
} else if let Some(file) = snapshot.file() {
let matched_path = if include_root {
query.file_matches(Some(&file.full_path(cx)))
} else {
query.file_matches(Some(file.path()))
};
if matched_path {
Some((file.path().clone(), (buffer, snapshot)))
} else {
None
}
} else {
unnamed_files.push(buffer);
None
}
})
.collect()
let open_buffers: Vec<_> = self.buffer_store.update(cx, |buffer_store, cx| {
buffer_store.find_seach_candidates(&query, cx)
});
cx.background_executor()
.spawn(Self::background_search(
unnamed_files,
opened_buffers,
cx.background_executor().clone(),
self.fs.clone(),
workers,
query.clone(),
include_root,
path_count,
snapshots,
matching_paths_tx,
))
.detach();
let skip_entries: HashSet<_> = open_buffers
.iter()
.filter_map(|candidate| candidate.entry_id())
.collect();
const MAX_SEARCH_RESULT_FILES: usize = 5_000;
const MAX_SEARCH_RESULT_RANGES: usize = 10_000;
let limit = MAX_SEARCH_RESULT_FILES.saturating_sub(open_buffers.len());
let matching_paths_rx = if self.is_via_ssh() {
self.find_search_candidates_ssh(&query, limit, skip_entries, cx)
} else {
self.worktree_store.update(cx, |worktree_store, cx| {
worktree_store.find_search_candidates(
query.clone(),
skip_entries,
self.fs.clone(),
cx,
)
})
};
let (result_tx, result_rx) = smol::channel::bounded(1024);
let buffer_store = self.buffer_store.clone();
cx.spawn(|_, mut cx| async move {
let mut matching_paths = open_buffers
.into_iter()
.chain(matching_paths_rx.take(limit + 1).collect::<Vec<_>>().await)
.collect::<Vec<_>>();
cx.spawn(|this, mut cx| async move {
const MAX_SEARCH_RESULT_FILES: usize = 5_000;
const MAX_SEARCH_RESULT_RANGES: usize = 10_000;
let mut matching_paths = matching_paths_rx
.take(MAX_SEARCH_RESULT_FILES + 1)
.collect::<Vec<_>>()
.await;
let mut limit_reached = if matching_paths.len() > MAX_SEARCH_RESULT_FILES {
matching_paths.pop();
matching_paths.truncate(MAX_SEARCH_RESULT_FILES);
true
} else {
false
@@ -7421,7 +7333,6 @@ impl Project {
})?;
let mut range_count = 0;
let query = Arc::new(query);
// Now that we know what paths match the query, we will load at most
// 64 buffers at a time to avoid overwhelming the main thread. For each
@@ -7437,8 +7348,14 @@ impl Project {
}
SearchMatchCandidate::Path {
worktree_id, path, ..
} => this.update(&mut cx, |this, cx| {
this.open_buffer((*worktree_id, path.clone()), cx)
} => buffer_store.update(&mut cx, |buffer_store, cx| {
buffer_store.open_buffer(
ProjectPath {
worktree_id: *worktree_id,
path: path.clone(),
},
cx,
)
})?,
};
@@ -7489,93 +7406,41 @@ impl Project {
result_rx
}
/// Pick paths that might potentially contain a match of a given search query.
#[allow(clippy::too_many_arguments)]
async fn background_search(
unnamed_buffers: Vec<Model<Buffer>>,
opened_buffers: HashMap<Arc<Path>, (Model<Buffer>, BufferSnapshot)>,
executor: BackgroundExecutor,
fs: Arc<dyn Fs>,
workers: usize,
query: SearchQuery,
include_root: bool,
path_count: usize,
snapshots: Vec<(Snapshot, WorktreeSettings)>,
matching_paths_tx: Sender<SearchMatchCandidate>,
) {
let fs = &fs;
let query = &query;
let matching_paths_tx = &matching_paths_tx;
let snapshots = &snapshots;
for buffer in unnamed_buffers {
matching_paths_tx
.send(SearchMatchCandidate::OpenBuffer {
buffer: buffer.clone(),
path: None,
})
.await
.log_err();
}
for (path, (buffer, _)) in opened_buffers.iter() {
matching_paths_tx
.send(SearchMatchCandidate::OpenBuffer {
buffer: buffer.clone(),
path: Some(path.clone()),
})
.await
.log_err();
}
pub fn find_search_candidates_ssh(
&self,
query: &SearchQuery,
limit: usize,
skip_entries: HashSet<ProjectEntryId>,
cx: &mut ModelContext<Self>,
) -> Receiver<SearchMatchCandidate> {
// todo: it'd be nice to stream these results.
let response = self
.ssh_session
.clone()
.unwrap()
.request(proto::FindSearchCandidates {
limit: limit as u64,
skip_entries: skip_entries.into_iter().map(|p| p.to_proto()).collect(),
query: Some(query.to_proto(0)),
});
let paths_per_worker = (path_count + workers - 1) / workers;
executor
.scoped(|scope| {
let max_concurrent_workers = Arc::new(Semaphore::new(workers));
for worker_ix in 0..workers {
let worker_start_ix = worker_ix * paths_per_worker;
let worker_end_ix = worker_start_ix + paths_per_worker;
let opened_buffers = opened_buffers.clone();
let limiter = Arc::clone(&max_concurrent_workers);
scope.spawn({
async move {
let _guard = limiter.acquire().await;
search_snapshots(
snapshots,
worker_start_ix,
worker_end_ix,
query,
matching_paths_tx,
&opened_buffers,
include_root,
fs,
)
.await;
}
});
let (matching_paths_tx, matching_paths_rx) = smol::channel::bounded(1024);
cx.background_executor()
.spawn(async move {
let response = response.await?;
for result in response.results {
matching_paths_tx
.send(SearchMatchCandidate::Path {
worktree_id: WorktreeId::from_proto(result.worktree_id),
path: PathBuf::from(result.path).into(),
})
.await?;
}
if query.include_ignored() {
for (snapshot, settings) in snapshots {
for ignored_entry in snapshot.entries(true, 0).filter(|e| e.is_ignored) {
let limiter = Arc::clone(&max_concurrent_workers);
scope.spawn(async move {
let _guard = limiter.acquire().await;
search_ignored_entry(
snapshot,
settings,
ignored_entry,
fs,
query,
matching_paths_tx,
)
.await;
});
}
}
}
anyhow::Ok(())
})
.await;
.detach_and_log_err(cx);
matching_paths_rx
}
pub fn request_lsp<R: LspCommand>(
@@ -10916,157 +10781,6 @@ fn deserialize_code_actions(code_actions: &HashMap<String, bool>) -> Vec<lsp::Co
.collect()
}
#[allow(clippy::too_many_arguments)]
async fn search_snapshots(
snapshots: &Vec<(Snapshot, WorktreeSettings)>,
worker_start_ix: usize,
worker_end_ix: usize,
query: &SearchQuery,
results_tx: &Sender<SearchMatchCandidate>,
opened_buffers: &HashMap<Arc<Path>, (Model<Buffer>, BufferSnapshot)>,
include_root: bool,
fs: &Arc<dyn Fs>,
) {
let mut snapshot_start_ix = 0;
let mut abs_path = PathBuf::new();
for (snapshot, _) in snapshots {
let snapshot_end_ix = snapshot_start_ix
+ if query.include_ignored() {
snapshot.file_count()
} else {
snapshot.visible_file_count()
};
if worker_end_ix <= snapshot_start_ix {
break;
} else if worker_start_ix > snapshot_end_ix {
snapshot_start_ix = snapshot_end_ix;
continue;
} else {
let start_in_snapshot = worker_start_ix.saturating_sub(snapshot_start_ix);
let end_in_snapshot = cmp::min(worker_end_ix, snapshot_end_ix) - snapshot_start_ix;
for entry in snapshot
.files(false, start_in_snapshot)
.take(end_in_snapshot - start_in_snapshot)
{
if results_tx.is_closed() {
break;
}
if opened_buffers.contains_key(&entry.path) {
continue;
}
let matched_path = if include_root {
let mut full_path = PathBuf::from(snapshot.root_name());
full_path.push(&entry.path);
query.file_matches(Some(&full_path))
} else {
query.file_matches(Some(&entry.path))
};
let matches = if matched_path {
abs_path.clear();
abs_path.push(&snapshot.abs_path());
abs_path.push(&entry.path);
if let Some(file) = fs.open_sync(&abs_path).await.log_err() {
query.detect(file).unwrap_or(false)
} else {
false
}
} else {
false
};
if matches {
let project_path = SearchMatchCandidate::Path {
worktree_id: snapshot.id(),
path: entry.path.clone(),
is_ignored: entry.is_ignored,
is_file: entry.is_file(),
};
if results_tx.send(project_path).await.is_err() {
return;
}
}
}
snapshot_start_ix = snapshot_end_ix;
}
}
}
async fn search_ignored_entry(
snapshot: &Snapshot,
settings: &WorktreeSettings,
ignored_entry: &Entry,
fs: &Arc<dyn Fs>,
query: &SearchQuery,
counter_tx: &Sender<SearchMatchCandidate>,
) {
let mut ignored_paths_to_process =
VecDeque::from([snapshot.abs_path().join(&ignored_entry.path)]);
while let Some(ignored_abs_path) = ignored_paths_to_process.pop_front() {
let metadata = fs
.metadata(&ignored_abs_path)
.await
.with_context(|| format!("fetching fs metadata for {ignored_abs_path:?}"))
.log_err()
.flatten();
if let Some(fs_metadata) = metadata {
if fs_metadata.is_dir {
let files = fs
.read_dir(&ignored_abs_path)
.await
.with_context(|| format!("listing ignored path {ignored_abs_path:?}"))
.log_err();
if let Some(mut subfiles) = files {
while let Some(subfile) = subfiles.next().await {
if let Some(subfile) = subfile.log_err() {
ignored_paths_to_process.push_back(subfile);
}
}
}
} else if !fs_metadata.is_symlink {
if !query.file_matches(Some(&ignored_abs_path))
|| settings.is_path_excluded(&ignored_entry.path)
{
continue;
}
let matches = if let Some(file) = fs
.open_sync(&ignored_abs_path)
.await
.with_context(|| format!("Opening ignored path {ignored_abs_path:?}"))
.log_err()
{
query.detect(file).unwrap_or(false)
} else {
false
};
if matches {
let project_path = SearchMatchCandidate::Path {
worktree_id: snapshot.id(),
path: Arc::from(
ignored_abs_path
.strip_prefix(snapshot.abs_path())
.expect("scanning worktree-related files"),
),
is_ignored: true,
is_file: ignored_entry.is_file(),
};
if counter_tx.send(project_path).await.is_err() {
return;
}
}
}
}
}
}
fn glob_literal_prefix(glob: &str) -> &str {
let mut literal_end = 0;
for (i, part) in glob.split(path::MAIN_SEPARATOR).enumerate() {
@@ -11651,75 +11365,3 @@ pub fn sort_worktree_entries(entries: &mut Vec<Entry>) {
)
});
}
fn sort_search_matches(search_matches: &mut Vec<SearchMatchCandidate>, cx: &AppContext) {
search_matches.sort_by(|entry_a, entry_b| match (entry_a, entry_b) {
(
SearchMatchCandidate::OpenBuffer {
buffer: buffer_a,
path: None,
},
SearchMatchCandidate::OpenBuffer {
buffer: buffer_b,
path: None,
},
) => buffer_a
.read(cx)
.remote_id()
.cmp(&buffer_b.read(cx).remote_id()),
(
SearchMatchCandidate::OpenBuffer { path: None, .. },
SearchMatchCandidate::Path { .. }
| SearchMatchCandidate::OpenBuffer { path: Some(_), .. },
) => Ordering::Less,
(
SearchMatchCandidate::OpenBuffer { path: Some(_), .. }
| SearchMatchCandidate::Path { .. },
SearchMatchCandidate::OpenBuffer { path: None, .. },
) => Ordering::Greater,
(
SearchMatchCandidate::OpenBuffer {
path: Some(path_a), ..
},
SearchMatchCandidate::Path {
is_file: is_file_b,
path: path_b,
..
},
) => compare_paths((path_a.as_ref(), true), (path_b.as_ref(), *is_file_b)),
(
SearchMatchCandidate::Path {
is_file: is_file_a,
path: path_a,
..
},
SearchMatchCandidate::OpenBuffer {
path: Some(path_b), ..
},
) => compare_paths((path_a.as_ref(), *is_file_a), (path_b.as_ref(), true)),
(
SearchMatchCandidate::OpenBuffer {
path: Some(path_a), ..
},
SearchMatchCandidate::OpenBuffer {
path: Some(path_b), ..
},
) => compare_paths((path_a.as_ref(), true), (path_b.as_ref(), true)),
(
SearchMatchCandidate::Path {
worktree_id: worktree_id_a,
is_file: is_file_a,
path: path_a,
..
},
SearchMatchCandidate::Path {
worktree_id: worktree_id_b,
is_file: is_file_b,
path: path_b,
..
},
) => worktree_id_a.cmp(&worktree_id_b).then_with(|| {
compare_paths((path_a.as_ref(), *is_file_a), (path_b.as_ref(), *is_file_b))
}),
});
}

View File

@@ -1,17 +1,21 @@
use aho_corasick::{AhoCorasick, AhoCorasickBuilder};
use anyhow::Result;
use client::proto;
use language::{char_kind, BufferSnapshot};
use gpui::{AppContext, Model};
use language::{char_kind, proto::serialize_anchor, Buffer, BufferSnapshot};
use regex::{Captures, Regex, RegexBuilder};
use smol::future::yield_now;
use std::{
borrow::Cow,
cmp::Ordering,
io::{BufRead, BufReader, Read},
ops::Range,
path::Path,
sync::{Arc, OnceLock},
};
use util::paths::PathMatcher;
use text::Anchor;
use util::paths::{compare_paths, PathMatcher};
use worktree::{ProjectEntryId, WorktreeId};
static TEXT_REPLACEMENT_SPECIAL_CHARACTERS_REGEX: OnceLock<Regex> = OnceLock::new();
@@ -459,3 +463,104 @@ mod tests {
}
}
}
#[derive(Clone, Debug, PartialEq)]
pub enum SearchMatchCandidate {
OpenBuffer {
buffer: Model<Buffer>,
entry_id: Option<ProjectEntryId>,
// This might be an unnamed file without representation on filesystem
path: Option<Arc<Path>>,
},
Path {
worktree_id: WorktreeId,
path: Arc<Path>,
},
}
impl SearchMatchCandidate {
pub fn entry_id(&self) -> Option<ProjectEntryId> {
match self {
Self::OpenBuffer { entry_id, .. } => *entry_id,
_ => None,
}
}
}
pub enum SearchResult {
Buffer {
buffer: Model<Buffer>,
ranges: Vec<Range<Anchor>>,
},
LimitReached,
}
impl SearchResult {
pub fn serialize_range(range: &Range<Anchor>) -> Range<proto::Anchor> {
serialize_anchor(&range.start)..serialize_anchor(&range.end)
}
}
pub(crate) fn sort_search_matches(search_matches: &mut Vec<SearchMatchCandidate>, cx: &AppContext) {
search_matches.sort_by(|entry_a, entry_b| match (entry_a, entry_b) {
(
SearchMatchCandidate::OpenBuffer {
buffer: buffer_a,
path: None,
..
},
SearchMatchCandidate::OpenBuffer {
buffer: buffer_b,
path: None,
..
},
) => buffer_a
.read(cx)
.remote_id()
.cmp(&buffer_b.read(cx).remote_id()),
(
SearchMatchCandidate::OpenBuffer { path: None, .. },
SearchMatchCandidate::Path { .. }
| SearchMatchCandidate::OpenBuffer { path: Some(_), .. },
) => Ordering::Less,
(
SearchMatchCandidate::OpenBuffer { path: Some(_), .. }
| SearchMatchCandidate::Path { .. },
SearchMatchCandidate::OpenBuffer { path: None, .. },
) => Ordering::Greater,
(
SearchMatchCandidate::OpenBuffer {
path: Some(path_a), ..
},
SearchMatchCandidate::Path { path: path_b, .. },
) => compare_paths((path_a.as_ref(), true), (path_b.as_ref(), true)),
(
SearchMatchCandidate::Path { path: path_a, .. },
SearchMatchCandidate::OpenBuffer {
path: Some(path_b), ..
},
) => compare_paths((path_a.as_ref(), true), (path_b.as_ref(), true)),
(
SearchMatchCandidate::OpenBuffer {
path: Some(path_a), ..
},
SearchMatchCandidate::OpenBuffer {
path: Some(path_b), ..
},
) => compare_paths((path_a.as_ref(), true), (path_b.as_ref(), true)),
(
SearchMatchCandidate::Path {
worktree_id: worktree_id_a,
path: path_a,
..
},
SearchMatchCandidate::Path {
worktree_id: worktree_id_b,
path: path_b,
..
},
) => worktree_id_a
.cmp(&worktree_id_b)
.then_with(|| compare_paths((path_a.as_ref(), true), (path_b.as_ref(), true))),
});
}

View File

@@ -1,12 +1,23 @@
use std::{cmp, collections::VecDeque, path::PathBuf, sync::Arc};
use anyhow::{anyhow, Context as _, Result};
use collections::HashMap;
use collections::{HashMap, HashSet};
use fs::Fs;
use gpui::{AppContext, AsyncAppContext, EntityId, EventEmitter, Model, ModelContext, WeakModel};
use rpc::{
proto::{self, AnyProtoClient},
TypedEnvelope,
};
use smol::{
channel::{Receiver, Sender},
lock::Semaphore,
stream::StreamExt,
};
use text::ReplicaId;
use worktree::{ProjectEntryId, Worktree, WorktreeId};
use util::ResultExt;
use worktree::{Entry, ProjectEntryId, Snapshot, Worktree, WorktreeId, WorktreeSettings};
use crate::search::{SearchMatchCandidate, SearchQuery};
pub struct WorktreeStore {
is_shared: bool,
@@ -61,6 +72,15 @@ impl WorktreeStore {
.find(|worktree| worktree.read(cx).contains_entry(entry_id))
}
pub fn entry_for_id<'a>(
&'a self,
entry_id: ProjectEntryId,
cx: &'a AppContext,
) -> Option<&'a Entry> {
self.worktrees()
.find_map(|worktree| worktree.read(cx).entry_for_id(entry_id))
}
pub fn add(&mut self, worktree: &Model<Worktree>, cx: &mut ModelContext<Self>) {
let push_strong_handle = self.is_shared || worktree.read(cx).is_visible();
let handle = if push_strong_handle {
@@ -238,6 +258,251 @@ impl WorktreeStore {
}
}
/// search over all worktrees (ignoring open buffers)
/// the query is tested against the file on disk and matching files are returned.
pub fn find_search_candidates(
&self,
query: SearchQuery,
skip_entries: HashSet<ProjectEntryId>,
fs: Arc<dyn Fs>,
cx: &ModelContext<Self>,
) -> Receiver<SearchMatchCandidate> {
let (matching_paths_tx, matching_paths_rx) = smol::channel::bounded(1024);
let snapshots = self
.visible_worktrees(cx)
.filter_map(|tree| {
let tree = tree.read(cx);
Some((tree.snapshot(), tree.as_local()?.settings()))
})
.collect::<Vec<_>>();
let include_root = snapshots.len() > 1;
let path_count: usize = snapshots
.iter()
.map(|(snapshot, _)| {
if query.include_ignored() {
snapshot.file_count()
} else {
snapshot.visible_file_count()
}
})
.sum();
if path_count == 0 {
return matching_paths_rx;
}
let workers = cx.background_executor().num_cpus().min(path_count);
let paths_per_worker = (path_count + workers - 1) / workers;
let executor = cx.background_executor().clone();
cx.background_executor()
.spawn(async move {
let fs = &fs;
let query = &query;
let matching_paths_tx = &matching_paths_tx;
let snapshots = &snapshots;
executor
.scoped(move |scope| {
let max_concurrent_workers = Arc::new(Semaphore::new(workers));
for worker_ix in 0..workers {
let snapshots = snapshots.clone();
let worker_start_ix = worker_ix * paths_per_worker;
let worker_end_ix = worker_start_ix + paths_per_worker;
let skip_entries = skip_entries.clone();
let limiter = Arc::clone(&max_concurrent_workers);
scope.spawn({
async move {
let _guard = limiter.acquire().await;
Self::search_snapshots(
&snapshots,
worker_start_ix,
worker_end_ix,
&query,
&matching_paths_tx,
&skip_entries,
include_root,
fs,
)
.await;
}
});
}
if query.include_ignored() {
for (snapshot, settings) in snapshots {
for ignored_entry in
snapshot.entries(true, 0).filter(|e| e.is_ignored)
{
let limiter = Arc::clone(&max_concurrent_workers);
scope.spawn(async move {
let _guard = limiter.acquire().await;
Self::search_ignored_entry(
&snapshot,
&settings,
ignored_entry,
&fs,
&query,
&matching_paths_tx,
)
.await;
});
}
}
}
})
.await
})
.detach();
return matching_paths_rx;
}
#[allow(clippy::too_many_arguments)]
async fn search_snapshots(
snapshots: &Vec<(worktree::Snapshot, WorktreeSettings)>,
worker_start_ix: usize,
worker_end_ix: usize,
query: &SearchQuery,
results_tx: &Sender<SearchMatchCandidate>,
skip_entries: &HashSet<ProjectEntryId>,
include_root: bool,
fs: &Arc<dyn Fs>,
) {
let mut snapshot_start_ix = 0;
let mut abs_path = PathBuf::new();
for (snapshot, _) in snapshots {
let snapshot_end_ix = snapshot_start_ix
+ if query.include_ignored() {
snapshot.file_count()
} else {
snapshot.visible_file_count()
};
if worker_end_ix <= snapshot_start_ix {
break;
} else if worker_start_ix > snapshot_end_ix {
snapshot_start_ix = snapshot_end_ix;
continue;
} else {
let start_in_snapshot = worker_start_ix.saturating_sub(snapshot_start_ix);
let end_in_snapshot = cmp::min(worker_end_ix, snapshot_end_ix) - snapshot_start_ix;
for entry in snapshot
.files(false, start_in_snapshot)
.take(end_in_snapshot - start_in_snapshot)
{
if results_tx.is_closed() {
break;
}
if skip_entries.contains(&entry.id) {
continue;
}
let matched_path = if include_root {
let mut full_path = PathBuf::from(snapshot.root_name());
full_path.push(&entry.path);
query.file_matches(Some(&full_path))
} else {
query.file_matches(Some(&entry.path))
};
let matches = if matched_path {
abs_path.clear();
abs_path.push(&snapshot.abs_path());
abs_path.push(&entry.path);
if let Some(file) = fs.open_sync(&abs_path).await.log_err() {
query.detect(file).unwrap_or(false)
} else {
false
}
} else {
false
};
if matches {
let project_path = SearchMatchCandidate::Path {
worktree_id: snapshot.id(),
path: entry.path.clone(),
};
if results_tx.send(project_path).await.is_err() {
return;
}
}
}
snapshot_start_ix = snapshot_end_ix;
}
}
}
async fn search_ignored_entry(
snapshot: &Snapshot,
settings: &WorktreeSettings,
ignored_entry: &Entry,
fs: &Arc<dyn Fs>,
query: &SearchQuery,
counter_tx: &Sender<SearchMatchCandidate>,
) {
let mut ignored_paths_to_process =
VecDeque::from([snapshot.abs_path().join(&ignored_entry.path)]);
while let Some(ignored_abs_path) = ignored_paths_to_process.pop_front() {
let metadata = fs
.metadata(&ignored_abs_path)
.await
.with_context(|| format!("fetching fs metadata for {ignored_abs_path:?}"))
.log_err()
.flatten();
if let Some(fs_metadata) = metadata {
if fs_metadata.is_dir {
let files = fs
.read_dir(&ignored_abs_path)
.await
.with_context(|| format!("listing ignored path {ignored_abs_path:?}"))
.log_err();
if let Some(mut subfiles) = files {
while let Some(subfile) = subfiles.next().await {
if let Some(subfile) = subfile.log_err() {
ignored_paths_to_process.push_back(subfile);
}
}
}
} else if !fs_metadata.is_symlink {
if !query.file_matches(Some(&ignored_abs_path))
|| settings.is_path_excluded(&ignored_entry.path)
{
continue;
}
let matches = if let Some(file) = fs
.open_sync(&ignored_abs_path)
.await
.with_context(|| format!("Opening ignored path {ignored_abs_path:?}"))
.log_err()
{
query.detect(file).unwrap_or(false)
} else {
false
};
if matches {
let project_path = SearchMatchCandidate::Path {
worktree_id: snapshot.id(),
path: Arc::from(
ignored_abs_path
.strip_prefix(snapshot.abs_path())
.expect("scanning worktree-related files"),
),
};
if counter_tx.send(project_path).await.is_err() {
return;
}
}
}
}
}
}
pub async fn handle_create_project_entry(
this: Model<Self>,
envelope: TypedEnvelope<proto::CreateProjectEntry>,

View File

@@ -275,7 +275,10 @@ message Envelope {
GetLlmTokenResponse get_llm_token_response = 236;
LspExtSwitchSourceHeader lsp_ext_switch_source_header = 241;
LspExtSwitchSourceHeaderResponse lsp_ext_switch_source_header_response = 242; // current max
LspExtSwitchSourceHeaderResponse lsp_ext_switch_source_header_response = 242;
FindSearchCandidates find_search_candidates = 243;
FindSearchCandidatesResponse find_search_candidates_response = 244; // current max
}
reserved 158 to 161;
@@ -1236,6 +1239,16 @@ message SearchProjectResponse {
bool limit_reached = 2;
}
message FindSearchCandidates {
SearchProject query = 1;
repeated uint64 skip_entries = 2;
uint64 limit = 3;
}
message FindSearchCandidatesResponse {
repeated ProjectPath results = 1;
}
message CodeAction {
uint64 server_id = 1;
Anchor start = 2;

View File

@@ -410,6 +410,8 @@ messages!(
(LspExtSwitchSourceHeaderResponse, Background),
(AddWorktree, Foreground),
(AddWorktreeResponse, Foreground),
(FindSearchCandidates, Foreground),
(FindSearchCandidatesResponse, Foreground)
);
request_messages!(
@@ -532,6 +534,7 @@ request_messages!(
(SynchronizeContexts, SynchronizeContextsResponse),
(LspExtSwitchSourceHeader, LspExtSwitchSourceHeaderResponse),
(AddWorktree, AddWorktreeResponse),
(FindSearchCandidates, FindSearchCandidatesResponse),
);
entity_messages!(

View File

@@ -1,10 +1,12 @@
use anyhow::Result;
use anyhow::{anyhow, Result};
use fs::Fs;
use futures::StreamExt;
use gpui::{AppContext, AsyncAppContext, Context, Model, ModelContext};
use project::{
buffer_store::{BufferStore, BufferStoreEvent},
search::{SearchMatchCandidate, SearchQuery},
worktree_store::WorktreeStore,
ProjectPath, WorktreeId, WorktreeSettings,
ProjectEntryId, ProjectPath, WorktreeId, WorktreeSettings,
};
use remote::SshSession;
use rpc::{
@@ -12,7 +14,6 @@ use rpc::{
TypedEnvelope,
};
use settings::{Settings as _, SettingsStore};
use smol::stream::StreamExt;
use std::{
path::{Path, PathBuf},
sync::{atomic::AtomicUsize, Arc},
@@ -49,6 +50,7 @@ impl HeadlessProject {
session.add_request_handler(this.clone(), Self::handle_list_remote_directory);
session.add_request_handler(this.clone(), Self::handle_add_worktree);
session.add_request_handler(this.clone(), Self::handle_open_buffer_by_path);
session.add_request_handler(this.clone(), Self::handle_find_search_candidates);
session.add_request_handler(buffer_store.downgrade(), BufferStore::handle_blame_buffer);
session.add_request_handler(buffer_store.downgrade(), BufferStore::handle_update_buffer);
@@ -178,6 +180,46 @@ impl HeadlessProject {
Ok(proto::ListRemoteDirectoryResponse { entries })
}
pub async fn handle_find_search_candidates(
this: Model<Self>,
envelope: TypedEnvelope<proto::FindSearchCandidates>,
mut cx: AsyncAppContext,
) -> Result<proto::FindSearchCandidatesResponse> {
let query =
SearchQuery::from_proto(envelope.payload.query.ok_or_else(|| anyhow!("no query"))?)?;
let limit = envelope.payload.limit as usize;
let skip_entries = envelope
.payload
.skip_entries
.into_iter()
.map(|entry| ProjectEntryId::from_proto(entry))
.collect();
let rx = this.update(&mut cx, |this, cx| {
let fs = this.fs.clone();
this.worktree_store.update(cx, |worktree_store, cx| {
worktree_store.find_search_candidates(query, skip_entries, fs, cx)
})
})?;
let results = rx
.take(limit)
.map(|result| {
if let SearchMatchCandidate::Path { path, worktree_id } = result {
proto::ProjectPath {
worktree_id: worktree_id.to_proto(),
path: path.to_string_lossy().to_string(),
}
} else {
unreachable!()
}
})
.collect::<Vec<_>>()
.await;
Ok(proto::FindSearchCandidatesResponse { results })
}
pub fn on_buffer_store_event(
&mut self,
_: Model<BufferStore>,

View File

@@ -1,55 +1,24 @@
use crate::headless_project::HeadlessProject;
use client::{Client, UserStore};
use clock::FakeSystemClock;
use fs::{FakeFs, Fs as _};
use fs::{FakeFs, Fs};
use gpui::{Context, Model, TestAppContext};
use http_client::FakeHttpClient;
use language::LanguageRegistry;
use node_runtime::FakeNodeRuntime;
use project::Project;
use project::{
search::{SearchQuery, SearchResult},
Project,
};
use remote::SshSession;
use serde_json::json;
use settings::SettingsStore;
use smol::stream::StreamExt;
use std::{path::Path, sync::Arc};
fn init_logger() {
if std::env::var("RUST_LOG").is_ok() {
env_logger::try_init().ok();
}
}
#[gpui::test]
async fn test_remote_editing(cx: &mut TestAppContext, server_cx: &mut TestAppContext) {
let (client_ssh, server_ssh) = SshSession::fake(cx, server_cx);
init_logger();
let fs = FakeFs::new(server_cx.executor());
fs.insert_tree(
"/code",
json!({
"project1": {
".git": {},
"README.md": "# project 1",
"src": {
"lib.rs": "fn one() -> usize { 1 }"
}
},
"project2": {
"README.md": "# project 2",
},
}),
)
.await;
fs.set_index_for_repo(
Path::new("/code/project1/.git"),
&[(Path::new("src/lib.rs"), "fn one() -> usize { 0 }".into())],
);
server_cx.update(HeadlessProject::init);
let _headless_project =
server_cx.new_model(|cx| HeadlessProject::new(server_ssh, fs.clone(), cx));
let project = build_project(client_ssh, cx);
let (project, _headless, fs) = init_test(cx, server_cx).await;
let (worktree, _) = project
.update(cx, |project, cx| {
project.find_or_create_worktree("/code/project1", true, cx)
@@ -150,6 +119,96 @@ async fn test_remote_editing(cx: &mut TestAppContext, server_cx: &mut TestAppCon
});
}
#[gpui::test]
async fn test_remote_project_search(cx: &mut TestAppContext, server_cx: &mut TestAppContext) {
let (project, _, _) = init_test(cx, server_cx).await;
project
.update(cx, |project, cx| {
project.find_or_create_worktree("/code/project1", true, cx)
})
.await
.unwrap();
cx.run_until_parked();
let mut receiver = project.update(cx, |project, cx| {
project.search(
SearchQuery::text(
"project",
false,
true,
false,
Default::default(),
Default::default(),
)
.unwrap(),
cx,
)
});
let first_response = receiver.next().await.unwrap();
let SearchResult::Buffer { buffer, .. } = first_response else {
panic!("incorrect result");
};
buffer.update(cx, |buffer, cx| {
assert_eq!(
buffer.file().unwrap().full_path(cx).to_string_lossy(),
"project1/README.md"
)
});
assert!(receiver.next().await.is_none());
}
fn init_logger() {
if std::env::var("RUST_LOG").is_ok() {
env_logger::try_init().ok();
}
}
async fn init_test(
cx: &mut TestAppContext,
server_cx: &mut TestAppContext,
) -> (Model<Project>, Model<HeadlessProject>, Arc<FakeFs>) {
let (client_ssh, server_ssh) = SshSession::fake(cx, server_cx);
init_logger();
let fs = FakeFs::new(server_cx.executor());
fs.insert_tree(
"/code",
json!({
"project1": {
".git": {},
"README.md": "# project 1",
"src": {
"lib.rs": "fn one() -> usize { 1 }"
}
},
"project2": {
"README.md": "# project 2",
},
}),
)
.await;
fs.set_index_for_repo(
Path::new("/code/project1/.git"),
&[(Path::new("src/lib.rs"), "fn one() -> usize { 0 }".into())],
);
server_cx.update(HeadlessProject::init);
let headless = server_cx.new_model(|cx| HeadlessProject::new(server_ssh, fs.clone(), cx));
let project = build_project(client_ssh, cx);
project
.update(cx, {
let headless = headless.clone();
|_, cx| cx.on_release(|_, _| drop(headless))
})
.detach();
(project, headless, fs)
}
fn build_project(ssh: Arc<SshSession>, cx: &mut TestAppContext) -> Model<Project> {
cx.update(|cx| {
let settings_store = SettingsStore::test(cx);

View File

@@ -18,7 +18,11 @@ use gpui::{
TextStyle, UpdateGlobal, View, ViewContext, VisualContext, WeakModel, WindowContext,
};
use menu::Confirm;
use project::{search::SearchQuery, search_history::SearchHistoryCursor, Project, ProjectPath};
use project::{
search::{SearchQuery, SearchResult},
search_history::SearchHistoryCursor,
Project, ProjectPath,
};
use settings::Settings;
use smol::stream::StreamExt;
use std::{
@@ -222,7 +226,7 @@ impl ProjectSearch {
let mut limit_reached = false;
while let Some(result) = matches.next().await {
match result {
project::SearchResult::Buffer { buffer, ranges } => {
SearchResult::Buffer { buffer, ranges } => {
let mut match_ranges = this
.update(&mut cx, |this, cx| {
this.excerpts.update(cx, |excerpts, cx| {
@@ -245,7 +249,7 @@ impl ProjectSearch {
}
this.update(&mut cx, |_, cx| cx.notify()).ok()?;
}
project::SearchResult::LimitReached => {
SearchResult::LimitReached => {
limit_reached = true;
}
}

View File

@@ -478,7 +478,7 @@ impl Worktree {
disconnected: false,
};
// Apply updates to a separate snapshto in a background task, then
// Apply updates to a separate snapshot in a background task, then
// send them to a foreground task which updates the model.
cx.background_executor()
.spawn(async move {