Add infer_context to settings

Revert "Add local_model"
This reverts commit 1ceb6038033336207aff8df7e7bed205d2f83baa.
2024-07-15 15:04:54 -04:00 · 2024-07-15 15:04:54 -04:00 · 2024-07-15 15:04:54 -04:00 · 2024-07-11 13:00:47 -04:00 · 2024-07-11 12:55:00 -04:00 · 2024-07-11 11:10:18 -04:00
6 changed files with 234 additions and 112 deletions
--- a/assets/settings/default.json
+++ b/assets/settings/default.json
@@ -397,7 +397,13 @@
      // 3. "gpt-4-turbo-preview"
      // 4. "gpt-4o"
      "default_model": "gpt-4o"
-    }
+    },
+    // Whether to enable the /auto command in the assistant panel, which infers context.
+    // Enabling this also enables indexing all the files in the project to generate
+    // metadata used in context inference. The first time a project is indexed, indexing
+    // can take a long time and use a lot of system resources. Later indexing is incremental
+    // and much faster.
+    "infer_context": false
  },
  // Whether the screen sharing icon is shown in the os status bar.
  "show_call_status_icon": true,
--- a/crates/assistant/src/assistant.rs
+++ b/crates/assistant/src/assistant.rs
@@ -139,7 +139,7 @@ impl LanguageModel {
    }
 }

-#[derive(Serialize, Deserialize, Debug, Eq, PartialEq)]
+#[derive(Clone, Serialize, Deserialize, Debug, Eq, PartialEq)]
 pub struct LanguageModelRequestMessage {
    pub role: Role,
    pub content: String,
@@ -160,7 +160,7 @@ impl LanguageModelRequestMessage {
    }
 }

-#[derive(Debug, Default, Serialize)]
+#[derive(Clone, Debug, Default, Serialize)]
 pub struct LanguageModelRequest {
    pub model: LanguageModel,
    pub messages: Vec<LanguageModelRequestMessage>,
--- a/crates/assistant/src/assistant_settings.rs
+++ b/crates/assistant/src/assistant_settings.rs
@@ -225,6 +225,7 @@ pub struct AssistantSettings {
    pub default_width: Pixels,
    pub default_height: Pixels,
    pub provider: AssistantProvider,
+    pub infer_context: bool,
 }

 /// Assistant panel settings
@@ -282,6 +283,7 @@ impl AssistantSettingsContent {
                        }
                    })
                },
+                infer_context: None,
            },
        }
    }
@@ -381,6 +383,7 @@ impl Default for VersionedAssistantSettingsContent {
            default_width: None,
            default_height: None,
            provider: None,
+            infer_context: None,
        })
    }
 }
@@ -412,6 +415,14 @@ pub struct AssistantSettingsContentV1 {
    /// This can either be the internal `zed.dev` service or an external `openai` service,
    /// each with their respective default models and configurations.
    provider: Option<AssistantProviderContent>,
+    /// When using the assistant panel, enable the /auto command to automatically
+    /// infer context. Enabling this will enable background indexing of the project,
+    /// to generate the metadata /auto needs to infer context. The first time a project
+    /// is indexed, the indexing process can take a long time and use a lot of system
+    /// resources. After the first time, later indexing is incremental and much faster.
+    ///
+    /// Default: false
+    infer_context: Option<bool>,
 }

 #[derive(Clone, Serialize, Deserialize, JsonSchema, Debug)]
@@ -466,6 +477,7 @@ impl Settings for AssistantSettings {
                &mut settings.default_height,
                value.default_height.map(Into::into),
            );
+            merge(&mut settings.infer_context, value.infer_context);
            if let Some(provider) = value.provider.clone() {
                match (&mut settings.provider, provider) {
                    (
--- a/crates/assistant/src/slash_command/auto_command.rs
+++ b/crates/assistant/src/slash_command/auto_command.rs
@@ -3,8 +3,9 @@ use super::{SlashCommand, SlashCommandOutput};
 use crate::{CompletionProvider, LanguageModelRequest, LanguageModelRequestMessage, Role};
 use anyhow::{anyhow, Result};
 use futures::StreamExt;
-use gpui::{AppContext, Task, WeakView};
+use gpui::{AppContext, AsyncAppContext, Task, WeakView};
 use language::{CodeLabel, LspAdapterDelegate};
+use serde::{Deserialize, Serialize};
 use std::sync::{atomic::AtomicBool, Arc};
 use ui::WindowContext;
 use workspace::Workspace;
@@ -54,140 +55,188 @@ impl SlashCommand for AutoCommand {
            return Task::ready(Err(anyhow!("missing prompt")));
        };

-        let prompt = format!("{PROMPT_INSTRUCTIONS_BEFORE_SUMMARY}\n{SUMMARY}\n{PROMPT_INSTRUCTIONS_AFTER_SUMMARY}\n{argument}");
-        let request = LanguageModelRequest {
-            model: CompletionProvider::global(cx).model(),
-            messages: vec![LanguageModelRequestMessage {
-                role: Role::User,
-                content: prompt,
-            }],
-            stop: vec![],
-            temperature: 1.0,
-        };
+        // to_string() is needed so it can live long enough to be used in cx.spawn
+        let original_prompt = argument.to_string();
+        let task = cx.spawn(|cx: gpui::AsyncWindowContext| async move {
+            let summaries: Vec<FileSummary> = serde_json::from_str(SUMMARY).unwrap_or_else(|_| {
+                // Since we generate the JSON ourselves, this parsing should never fail. If it does, that's a bug.
+                log::error!("JSON parsing of project file summaries failed");

-        let stream = CompletionProvider::global(cx).complete(request);
-        let mut wip_action: String = String::new();
-        let task: Task<Result<String>> = cx.spawn(|_cx| async move {
-            let mut actions_text = String::new();
-            let stream_completion = async {
-                let mut messages = stream.await?;
+                // Handle this gracefully by not including any summaries. Assistant results
+                // will be worse than if we actually had summaries, but we won't block the user.
+                Vec::new()
+            });

-                while let Some(message) = messages.next().await {
-                    let text = message?;
-
-                    chunked_line(&mut wip_action, &text, |line| {
-                        actions_text.push('/');
-                        actions_text.push_str(line);
-                        actions_text.push('\n');
-                    });
-
-                    smol::future::yield_now().await;
-                }
-
-                anyhow::Ok(())
-            };
-
-            stream_completion.await?;
-
-            Ok(actions_text)
+            commands_for_summaries(&summaries, &original_prompt, &cx).await
        });

-        // As a convenience, append /auto's argument to the end of the prompt so you don't have to write it again.
-        let argument = argument.to_string();
+        // As a convenience, append /auto's argument to the end of the prompt
+        // so you don't have to write it again.
+        let original_prompt = argument.to_string();

        cx.background_executor().spawn(async move {
-            let mut text = task.await?;
+            let commands = task.await?;
+            let mut prompt = String::new();

-            text.push_str(&argument);
+            log::info!(
+                "Translating this response into slash-commands: {:?}",
+                commands
+            );
+
+            for command in commands {
+                prompt.push('/');
+                prompt.push_str(&command.name);
+                prompt.push(' ');
+                prompt.push_str(&command.arg);
+                prompt.push('\n');
+            }
+
+            prompt.push('\n');
+            prompt.push_str(&original_prompt);

            Ok(SlashCommandOutput {
-                text,
+                text: prompt,
                sections: Vec::new(),
                run_commands_in_text: true,
            })
        })
    }
 }
-const PROMPT_INSTRUCTIONS_BEFORE_SUMMARY: &str = r#"
-I'm going to give you a prompt. I don't want you to respond
-to the prompt itself. I want you to figure out which of the following
-actions on my project, if any, would help you answer the prompt.

-Here are the actions:
+const PROMPT_INSTRUCTIONS_BEFORE_SUMMARY: &str = include_str!("prompt_before_summary.txt");
+const PROMPT_INSTRUCTIONS_AFTER_SUMMARY: &str = include_str!("prompt_after_summary.txt");
+const SUMMARY: &str = include_str!("/Users/rtfeldman/code/summarize-dir/combined_summaries.json");

-## file
+#[derive(Serialize, Deserialize)]
+struct FileSummary {
+    filename: String,
+    summary: String,
+}

-This action's parameter is a file path to one of the files
-in the project. If you ask for this action, I will tell you
-the full contents of the file, so you  can learn all the
-details of the file.
+fn summaries_prompt(summaries: &[FileSummary], original_prompt: &str) -> String {
+    let json_summaries = serde_json::to_string(summaries).unwrap();

-## search
+    format!("{PROMPT_INSTRUCTIONS_BEFORE_SUMMARY}\n{json_summaries}\n{PROMPT_INSTRUCTIONS_AFTER_SUMMARY}\n{original_prompt}")
+}

-This action's parameter is a string to search for across
-the project. It will tell you which files this string
-(or similar strings; it is a semantic search) appear in,
-as well as some context of the lines surrounding each result.
+/// The slash commands that the model is told about, and which we look for in the inference response.
+const SUPPORTED_SLASH_COMMANDS: &[&str] = &["search", "file"];

---
+#[derive(Debug, Clone)]
+struct CommandToRun {
+    name: String,
+    arg: String,
+}

-That was the end of the list of actions.
+/// Given the pre-indexed file summaries for this project, as well as the original prompt
+/// string passed to `/auto`, get a list of slash commands to run, along with their arguments.
+///
+/// The prompt's output does not include the slashes (to reduce the chance that it makes a mistake),
+/// so taking one of these returned Strings and turning it into a real slash-command-with-argument
+/// involves prepending a slash to it.
+///
+/// This function will validate that each of the returned lines begins with one of SUPPORTED_SLASH_COMMANDS.
+/// Any other lines it encounters will be discarded, with a warning logged.
+async fn commands_for_summaries(
+    summaries: &[FileSummary],
+    original_prompt: &str,
+    cx: &AsyncAppContext,
+) -> Result<Vec<CommandToRun>> {
+    if summaries.is_empty() {
+        return Ok(Vec::new());
+    }

-Here is an XML summary of each of the files in my project:
-"#;
+    let model = cx.update(|cx| CompletionProvider::global(cx).model())?;
+    let max_token_count = model.max_token_count();

-const PROMPT_INSTRUCTIONS_AFTER_SUMMARY: &str = r#"
-Actions have a cost, so only include actions that you think
-will be helpful to you in doing a great job answering the
-prompt in the future.
+    // Rather than recursing (which would require this async function use a pinned box),
+    // we use an explicit stack of arguments and answers for when we need to "recurse."
+    let mut stack = vec![(summaries, String::new())];
+    let mut final_response = Vec::new();

-You must respond ONLY with a list of actions you would like to
-perform. Each action should be on its own line, and followed by a space and then its parameter.
-
-Actions can be performed more than once with different parameters.
-Here is an example valid response:
-
-```
-file path/to/my/file.txt
-file path/to/another/file.txt
-search something to search for
-search something else to search for
-```
-
-Once again, do not forget: you must respond ONLY in the format of
-one action per line, and the action name should be followed by
-its parameter. Your response must not include anything other
-than a list of actions, with one action per line, in this format.
-It is extremely important that you do not deviate from this format even slightly!
-
-This is the end of my instructions for how to respond. The rest is the prompt:
-"#;
-
-const SUMMARY: &str = "";
-
-fn chunked_line(wip: &mut String, chunk: &str, mut on_line_end: impl FnMut(&str)) {
-    // The first iteration of the loop should just push to wip
-    // and nothing else. We only push what we encountered in
-    // previous iterations of the loop.
-    //
-    // This correctly handles both the scenario where no
-    // newlines are encountered (the loop will only run once,
-    // and so will only push to wip), as well as the scenario
-    // where the chunk contains at least one newline but
-    // does not end in a newline (the last iteration of the
-    // loop will update wip but will not run anything).
-    let mut is_first_iteration = true;
-
-    for line in chunk.split('\n') {
-        if is_first_iteration {
-            is_first_iteration = false;
-        } else {
-            // Since this isn't the first iteration of the loop, we definitely hit a newline
-            // at the end of the previous iteration! Run the function on whatever wip we have.
-            on_line_end(wip);
-            wip.clear();
+    while let Some((current_summaries, mut accumulated_response)) = stack.pop() {
+        // The split can result in one slice being empty and the other having one element.
+        // Whenever that happens, skip the empty one.
+        if current_summaries.is_empty() {
+            continue;
        }

-        wip.push_str(line);
+        log::info!(
+            "Inferring prompt context using {} file summaries",
+            current_summaries.len()
+        );
+
+        let request = LanguageModelRequest {
+            model: model.clone(),
+            messages: vec![LanguageModelRequestMessage {
+                role: Role::User,
+                content: summaries_prompt(&current_summaries, original_prompt),
+            }],
+            stop: Vec::new(),
+            temperature: 1.0,
+        };
+
+        let token_count = cx
+            .update(|cx| CompletionProvider::global(cx).count_tokens(request.clone(), cx))?
+            .await?;
+
+        if token_count < max_token_count {
+            let mut response_chunks = cx
+                .update(|cx| CompletionProvider::global(cx).complete(request))?
+                .await?;
+
+            while let Some(chunk) = response_chunks.next().await {
+                accumulated_response.push_str(&chunk?);
+            }
+
+            for line in accumulated_response.split('\n') {
+                if let Some(first_space) = line.find(' ') {
+                    let command = &line[..first_space].trim();
+                    let arg = &line[first_space..].trim();
+
+                    // Don't return empty or duplicate or duplicate commands
+                    if !command.is_empty()
+                        && !final_response
+                            .iter()
+                            .any(|cmd: &CommandToRun| cmd.name == *command && cmd.arg == *arg)
+                    {
+                        if SUPPORTED_SLASH_COMMANDS
+                            .iter()
+                            .any(|supported| command == supported)
+                        {
+                            final_response.push(CommandToRun {
+                                name: command.to_string(),
+                                arg: arg.to_string(),
+                            });
+                        } else {
+                            log::warn!(
+                                "Context inference returned an unrecognized slash-commend line: {:?}",
+                                line
+                            );
+                        }
+                    }
+                } else if !line.trim().is_empty() {
+                    // All slash-commands currently supported in context inference need a space for the argument.
+                    log::warn!(
+                        "Context inference returned a non-blank line that contained no spaces (meaning no argument for the slash-command): {:?}",
+                        line
+                    );
+                }
+            }
+        } else if current_summaries.len() == 1 {
+            log::warn!("Inferring context for a single file's summary failed because the prompt's token length exceeded the model's token limit.");
+        } else {
+            log::info!(
+                "Context inference using file summaries resulted in a prompt containing {token_count} tokens, which exceeded the model's max of {max_token_count}. Retrying as two separate prompts, each including half the number of summaries.",
+            );
+            let (left, right) = current_summaries.split_at(current_summaries.len() / 2);
+            stack.push((right, accumulated_response.clone()));
+            stack.push((left, accumulated_response));
+        }
    }
+
+    // Sort the commands by name (reversed just so that /search appears before /file)
+    final_response.sort_by(|cmd1, cmd2| cmd1.name.cmp(&cmd2.name).reverse());
+
+    Ok(final_response)
 }
--- a/crates/assistant/src/slash_command/prompt_after_summary.txt
+++ b/crates/assistant/src/slash_command/prompt_after_summary.txt
@@ -0,0 +1,24 @@
+Actions have a cost, so only include actions that you think
+will be helpful to you in doing a great job answering the
+prompt in the future.
+
+You must respond ONLY with a list of actions you would like to
+perform. Each action should be on its own line, and followed by a space and then its parameter.
+
+Actions can be performed more than once with different parameters.
+Here is an example valid response:
+
+```
+file path/to/my/file.txt
+file path/to/another/file.txt
+search something to search for
+search something else to search for
+```
+
+Once again, do not forget: you must respond ONLY in the format of
+one action per line, and the action name should be followed by
+its parameter. Your response must not include anything other
+than a list of actions, with one action per line, in this format.
+It is extremely important that you do not deviate from this format even slightly!
+
+This is the end of my instructions for how to respond. The rest is the prompt:
--- a/crates/assistant/src/slash_command/prompt_before_summary.txt
+++ b/crates/assistant/src/slash_command/prompt_before_summary.txt
@@ -0,0 +1,31 @@
+I'm going to give you a prompt. I don't want you to respond
+to the prompt itself. I want you to figure out which of the following
+actions on my project, if any, would help you answer the prompt.
+
+Here are the actions:
+
+## file
+
+This action's parameter is a file path to one of the files
+in the project. If you ask for this action, I will tell you
+the full contents of the file, so you  can learn all the
+details of the file.
+
+## search
+
+This action's parameter is a string to do a semantic search for
+across the files in the project. (You will have a JSON summary
+of all the files in the project.) It will tell you which files this string
+(or similar strings; it is a semantic search) appear in,
+as well as some context of the lines surrounding each result.
+It's very important that you only use this action when you think
+that searching across the specific files in this project for the query
+in question will be useful. For example, don't use this command to search
+for queries you might put into a general Web search engine, because those
+will be too general to give useful results in this project-specific search.
+
+---
+
+That was the end of the list of actions.
+
+Here is a JSON summary of each of the files in my project:
Author	SHA1	Message	Date
Richard Feldman	3a9e53d561	Add infer_context to settings	2024-07-15 15:04:54 -04:00
Richard Feldman	7ee2511b89	Revert "Add local_model" This reverts commit 1ceb6038033336207aff8df7e7bed205d2f83baa.	2024-07-15 15:04:54 -04:00
Richard Feldman	1eb90e0400	Add local_model	2024-07-15 15:04:54 -04:00
Richard Feldman	72e73544bb	Try some clarifying prompt text	2024-07-11 13:00:47 -04:00
Richard Feldman	586cf40d63	Sort inferred slash-commands	2024-07-11 12:55:00 -04:00
Richard Feldman	18f2fc9aa3	Cleanup	2024-07-11 11:10:18 -04:00
Richard Feldman	b1beed4ca9	Structural summaries	2024-07-11 10:36:18 -04:00
Richard Feldman	8d37c1c6ea	Revert "wip 2" This reverts commit `0680f6469b`.	2024-07-09 22:09:36 -04:00
Richard Feldman	0680f6469b	wip 2	2024-07-09 22:09:35 -04:00
Richard Feldman	57a543ebe8	wip	2024-07-09 21:47:02 -04:00
Richard Feldman	d69fb469bd	Try a split-based approach	2024-07-09 20:52:14 -04:00
Richard Feldman	eb413ba404	First pass at splitting summary context	2024-07-09 20:52:11 -04:00