set up way to run the indexing example with actual embeddings
This commit is contained in:
1
Cargo.lock
generated
1
Cargo.lock
generated
@@ -8617,6 +8617,7 @@ dependencies = [
|
||||
"anyhow",
|
||||
"async-trait",
|
||||
"client",
|
||||
"clock",
|
||||
"collections",
|
||||
"env_logger",
|
||||
"fs",
|
||||
|
||||
@@ -12,6 +12,7 @@ path = "src/semantic_index.rs"
|
||||
anyhow.workspace = true
|
||||
async-trait.workspace = true
|
||||
client.workspace = true
|
||||
clock.workspace = true
|
||||
collections.workspace = true
|
||||
fs.workspace = true
|
||||
futures.workspace = true
|
||||
|
||||
@@ -1,11 +1,13 @@
|
||||
use client::Client;
|
||||
use futures::channel::oneshot;
|
||||
use gpui::{App, Global, TestAppContext};
|
||||
use language::language_settings::AllLanguageSettings;
|
||||
use project::Project;
|
||||
use semantic_index::SemanticIndex;
|
||||
use settings::SettingsStore;
|
||||
use std::path::Path;
|
||||
use std::{path::Path, sync::Arc};
|
||||
use tempfile::tempdir;
|
||||
use util::http::HttpClientWithUrl;
|
||||
|
||||
pub fn init_test(cx: &mut TestAppContext) {
|
||||
_ = cx.update(|cx| {
|
||||
@@ -22,6 +24,8 @@ pub fn init_test(cx: &mut TestAppContext) {
|
||||
fn main() {
|
||||
env_logger::init();
|
||||
|
||||
use clock::FakeSystemClock;
|
||||
|
||||
App::new().run(|cx| {
|
||||
let store = SettingsStore::test(cx);
|
||||
cx.set_global(store);
|
||||
@@ -31,6 +35,12 @@ fn main() {
|
||||
store.update_user_settings::<AllLanguageSettings>(cx, |_| {});
|
||||
});
|
||||
|
||||
let clock = Arc::new(FakeSystemClock::default());
|
||||
let http = Arc::new(HttpClientWithUrl::new("http://localhost:11434"));
|
||||
|
||||
let client = client::Client::new(clock, http.clone(), cx);
|
||||
Client::set_global(client.clone(), cx);
|
||||
|
||||
let temp_dir = tempdir().unwrap();
|
||||
let semantic_index = SemanticIndex::new(temp_dir.path(), cx);
|
||||
|
||||
|
||||
@@ -4,6 +4,7 @@ use std::{cmp, ops::Range, sync::Arc};
|
||||
|
||||
const CHUNK_THRESHOLD: usize = 1500;
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub struct Chunk {
|
||||
pub range: Range<usize>,
|
||||
digest: [u8; 32],
|
||||
|
||||
@@ -14,6 +14,7 @@ pub const EMBEDDING_SIZE_LARGE: usize = 3072;
|
||||
|
||||
// TODO: Check out Voyage
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
pub enum Embedding {
|
||||
Tiny([f32; EMBEDDING_SIZE_TINY]),
|
||||
Small([f32; EMBEDDING_SIZE_SMALL]),
|
||||
|
||||
@@ -357,7 +357,7 @@ impl WorktreeIndex {
|
||||
.into_iter()
|
||||
.filter_map(|(chunk, embedding)| {
|
||||
embedding
|
||||
.ok()
|
||||
.log_err()
|
||||
.map(|embedding| EmbeddedChunk { chunk, embedding })
|
||||
})
|
||||
.collect::<Vec<EmbeddedChunk>>();
|
||||
@@ -382,7 +382,12 @@ impl WorktreeIndex {
|
||||
embedded_files: channel::Receiver<EmbeddedFile>,
|
||||
cx: &mut AsyncAppContext,
|
||||
) -> Task<()> {
|
||||
todo!()
|
||||
// Let's just log the files for now
|
||||
cx.spawn(|cx| async move {
|
||||
while let Ok(embedded_file) = embedded_files.recv().await {
|
||||
// println!("Embedded file: {:?}", embedded_file.chunks);
|
||||
}
|
||||
})
|
||||
}
|
||||
}
|
||||
|
||||
@@ -399,6 +404,7 @@ struct EmbeddedFile {
|
||||
chunks: Vec<EmbeddedChunk>,
|
||||
}
|
||||
|
||||
#[derive(Debug)]
|
||||
struct EmbeddedChunk {
|
||||
chunk: Chunk,
|
||||
embedding: Embedding,
|
||||
|
||||
Reference in New Issue
Block a user