set up way to run the indexing example with actual embeddings

This commit is contained in:
Kyle Kelley
2024-04-04 12:10:01 -07:00
parent 876d017294
commit f80ac2c190
6 changed files with 23 additions and 3 deletions

1
Cargo.lock generated
View File

@@ -8617,6 +8617,7 @@ dependencies = [
"anyhow",
"async-trait",
"client",
"clock",
"collections",
"env_logger",
"fs",

View File

@@ -12,6 +12,7 @@ path = "src/semantic_index.rs"
anyhow.workspace = true
async-trait.workspace = true
client.workspace = true
clock.workspace = true
collections.workspace = true
fs.workspace = true
futures.workspace = true

View File

@@ -1,11 +1,13 @@
use client::Client;
use futures::channel::oneshot;
use gpui::{App, Global, TestAppContext};
use language::language_settings::AllLanguageSettings;
use project::Project;
use semantic_index::SemanticIndex;
use settings::SettingsStore;
use std::path::Path;
use std::{path::Path, sync::Arc};
use tempfile::tempdir;
use util::http::HttpClientWithUrl;
pub fn init_test(cx: &mut TestAppContext) {
_ = cx.update(|cx| {
@@ -22,6 +24,8 @@ pub fn init_test(cx: &mut TestAppContext) {
fn main() {
env_logger::init();
use clock::FakeSystemClock;
App::new().run(|cx| {
let store = SettingsStore::test(cx);
cx.set_global(store);
@@ -31,6 +35,12 @@ fn main() {
store.update_user_settings::<AllLanguageSettings>(cx, |_| {});
});
let clock = Arc::new(FakeSystemClock::default());
let http = Arc::new(HttpClientWithUrl::new("http://localhost:11434"));
let client = client::Client::new(clock, http.clone(), cx);
Client::set_global(client.clone(), cx);
let temp_dir = tempdir().unwrap();
let semantic_index = SemanticIndex::new(temp_dir.path(), cx);

View File

@@ -4,6 +4,7 @@ use std::{cmp, ops::Range, sync::Arc};
const CHUNK_THRESHOLD: usize = 1500;
#[derive(Debug, Clone)]
pub struct Chunk {
pub range: Range<usize>,
digest: [u8; 32],

View File

@@ -14,6 +14,7 @@ pub const EMBEDDING_SIZE_LARGE: usize = 3072;
// TODO: Check out Voyage
#[derive(Debug, Clone)]
pub enum Embedding {
Tiny([f32; EMBEDDING_SIZE_TINY]),
Small([f32; EMBEDDING_SIZE_SMALL]),

View File

@@ -357,7 +357,7 @@ impl WorktreeIndex {
.into_iter()
.filter_map(|(chunk, embedding)| {
embedding
.ok()
.log_err()
.map(|embedding| EmbeddedChunk { chunk, embedding })
})
.collect::<Vec<EmbeddedChunk>>();
@@ -382,7 +382,12 @@ impl WorktreeIndex {
embedded_files: channel::Receiver<EmbeddedFile>,
cx: &mut AsyncAppContext,
) -> Task<()> {
todo!()
// Let's just log the files for now
cx.spawn(|cx| async move {
while let Ok(embedded_file) = embedded_files.recv().await {
// println!("Embedded file: {:?}", embedded_file.chunks);
}
})
}
}
@@ -399,6 +404,7 @@ struct EmbeddedFile {
chunks: Vec<EmbeddedChunk>,
}
#[derive(Debug)]
struct EmbeddedChunk {
chunk: Chunk,
embedding: Embedding,