Compare commits

...

6 Commits

Author SHA1 Message Date
Joseph T Lyons
0c74f4e0ca v0.151.x stable 2024-09-04 10:30:20 -04:00
Kirill Bulatov
7c0ad81a3d Improve outline panel performance (#17183)
Part of https://github.com/zed-industries/zed/issues/14235

* moved search results highlight calculation into the background thread,
with highlight-less representation as a fallback
* show only a part of the line per search result, stop uniting them into
a single line if possible, always trim left trailing whitespaces
* highlight results in batches
* better cache all search result data, related to rendering
* add test infra and fix folding-related issues
* improve entry displays when multi buffer has a buffer search (find
references one has)
* fix cloud notes not showing search matches

Release Notes:

- Improved outline panel performance
2024-09-02 01:51:47 +03:00
Peter Tripp
cd0841c590 zed 0.151.1 2024-08-30 09:01:14 -04:00
Peter Tripp
f44f5c5bc7 Ollama max_tokens settings (#17025)
- Support `available_models` for Ollama
- Clamp default max tokens (context length) to 16384.
- Add documentation for ollama context configuration.
2024-08-30 09:00:03 -04:00
CharlesChen0823
670b3b9382 file_finder: Fix crash in new_path_prompt (#16991)
Closes #16919 

repro step:
  - add two worktree
  - modify settings `"use_system_path_prompts" : false`
  - `ctrl-n` create new file, typing any chars.
  - `ctrl-s` save file
  - typing any char, crashed.

Release Notes:

- Fixed crashed when setting `"use_system_path_prompts": false` or in
remote project with two or more worktree.
2024-08-30 08:59:50 -04:00
Joseph T Lyons
75da9af3e6 v0.151.x preview 2024-08-28 11:43:35 -04:00
14 changed files with 1858 additions and 707 deletions

582
Cargo.lock generated

File diff suppressed because it is too large Load Diff

View File

@@ -135,6 +135,7 @@ impl AssistantSettingsContent {
Some(language_model::settings::OllamaSettingsContent {
api_url,
low_speed_timeout_in_seconds,
available_models: None,
});
}
},
@@ -295,7 +296,7 @@ impl AssistantSettingsContent {
_ => (None, None),
};
settings.provider = Some(AssistantProviderContentV1::Ollama {
default_model: Some(ollama::Model::new(&model)),
default_model: Some(ollama::Model::new(&model, None, None)),
api_url,
low_speed_timeout_in_seconds,
});

View File

@@ -107,8 +107,10 @@ impl Match {
if let Some(path_match) = &self.path_match {
text.push_str(&path_match.path.to_string_lossy());
let mut whole_path = PathBuf::from(path_match.path_prefix.to_string());
whole_path = whole_path.join(path_match.path.clone());
for (range, style) in highlight_ranges(
&path_match.path.to_string_lossy(),
&whole_path.to_string_lossy(),
&path_match.positions,
gpui::HighlightStyle::color(Color::Accent.color(cx)),
) {

View File

@@ -6,8 +6,10 @@ use ollama::{
get_models, preload_model, stream_chat_completion, ChatMessage, ChatOptions, ChatRequest,
ChatResponseDelta, OllamaToolCall,
};
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
use settings::{Settings, SettingsStore};
use std::{sync::Arc, time::Duration};
use std::{collections::BTreeMap, sync::Arc, time::Duration};
use ui::{prelude::*, ButtonLike, Indicator};
use util::ResultExt;
@@ -28,6 +30,17 @@ const PROVIDER_NAME: &str = "Ollama";
pub struct OllamaSettings {
pub api_url: String,
pub low_speed_timeout: Option<Duration>,
pub available_models: Vec<AvailableModel>,
}
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
pub struct AvailableModel {
/// The model name in the Ollama API (e.g. "llama3.1:latest")
pub name: String,
/// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
pub display_name: Option<String>,
/// The Context Length parameter to the model (aka num_ctx or n_ctx)
pub max_tokens: usize,
}
pub struct OllamaLanguageModelProvider {
@@ -61,7 +74,7 @@ impl State {
// indicating which models are embedding models,
// simply filter out models with "-embed" in their name
.filter(|model| !model.name.contains("-embed"))
.map(|model| ollama::Model::new(&model.name))
.map(|model| ollama::Model::new(&model.name, None, None))
.collect();
models.sort_by(|a, b| a.name.cmp(&b.name));
@@ -123,10 +136,32 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
}
fn provided_models(&self, cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
self.state
.read(cx)
let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
// Add models from the Ollama API
for model in self.state.read(cx).available_models.iter() {
models.insert(model.name.clone(), model.clone());
}
// Override with available models from settings
for model in AllLanguageModelSettings::get_global(cx)
.ollama
.available_models
.iter()
{
models.insert(
model.name.clone(),
ollama::Model {
name: model.name.clone(),
display_name: model.display_name.clone(),
max_tokens: model.max_tokens,
keep_alive: None,
},
);
}
models
.into_values()
.map(|model| {
Arc::new(OllamaLanguageModel {
id: LanguageModelId::from(model.name.clone()),

View File

@@ -152,6 +152,7 @@ pub struct AnthropicSettingsContentV1 {
pub struct OllamaSettingsContent {
pub api_url: Option<String>,
pub low_speed_timeout_in_seconds: Option<u64>,
pub available_models: Option<Vec<provider::ollama::AvailableModel>>,
}
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
@@ -276,6 +277,9 @@ impl settings::Settings for AllLanguageModelSettings {
anthropic.as_ref().and_then(|s| s.available_models.clone()),
);
// Ollama
let ollama = value.ollama.clone();
merge(
&mut settings.ollama.api_url,
value.ollama.as_ref().and_then(|s| s.api_url.clone()),
@@ -288,6 +292,10 @@ impl settings::Settings for AllLanguageModelSettings {
settings.ollama.low_speed_timeout =
Some(Duration::from_secs(low_speed_timeout_in_seconds));
}
merge(
&mut settings.ollama.available_models,
ollama.as_ref().and_then(|s| s.available_models.clone()),
);
// OpenAI
let (openai, upgraded) = match value.openai.clone().map(|s| s.upgrade()) {

View File

@@ -66,40 +66,37 @@ impl Default for KeepAlive {
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
pub struct Model {
pub name: String,
pub display_name: Option<String>,
pub max_tokens: usize,
pub keep_alive: Option<KeepAlive>,
}
// This could be dynamically retrieved via the API (1 call per model)
// curl -s http://localhost:11434/api/show -d '{"model": "llama3.1:latest"}' | jq '.model_info."llama.context_length"'
fn get_max_tokens(name: &str) -> usize {
match name {
"dolphin-llama3:8b-256k" => 262144, // 256K
_ => match name.split(':').next().unwrap() {
"mistral-nemo" => 1024000, // 1M
"deepseek-coder-v2" => 163840, // 160K
"llama3.1" | "phi3" | "command-r" | "command-r-plus" => 131072, // 128K
"codeqwen" => 65536, // 64K
"mistral" | "mistral-large" | "dolphin-mistral" | "codestral" // 32K
| "mistral-openorca" | "dolphin-mixtral" | "mixstral" | "llava"
| "qwen" | "qwen2" | "wizardlm2" | "wizard-math" => 32768,
"codellama" | "stable-code" | "deepseek-coder" | "starcoder2" // 16K
| "wizardcoder" => 16384,
"llama3" | "gemma2" | "gemma" | "codegemma" | "dolphin-llama3" // 8K
| "llava-llama3" | "starcoder" | "openchat" | "aya" => 8192,
"llama2" | "yi" | "llama2-chinese" | "vicuna" | "nous-hermes2" // 4K
| "stablelm2" => 4096,
"phi" | "orca-mini" | "tinyllama" | "granite-code" => 2048, // 2K
_ => 2048, // 2K (default)
},
/// Default context length for unknown models.
const DEFAULT_TOKENS: usize = 2048;
/// Magic number. Lets many Ollama models work with ~16GB of ram.
const MAXIMUM_TOKENS: usize = 16384;
match name.split(':').next().unwrap() {
"phi" | "tinyllama" | "granite-code" => 2048,
"llama2" | "yi" | "vicuna" | "stablelm2" => 4096,
"llama3" | "gemma2" | "gemma" | "codegemma" | "starcoder" | "aya" => 8192,
"codellama" | "starcoder2" => 16384,
"mistral" | "codestral" | "mixstral" | "llava" | "qwen2" | "dolphin-mixtral" => 32768,
"llama3.1" | "phi3" | "phi3.5" | "command-r" | "deepseek-coder-v2" => 128000,
_ => DEFAULT_TOKENS,
}
.clamp(1, MAXIMUM_TOKENS)
}
impl Model {
pub fn new(name: &str) -> Self {
pub fn new(name: &str, display_name: Option<&str>, max_tokens: Option<usize>) -> Self {
Self {
name: name.to_owned(),
max_tokens: get_max_tokens(name),
display_name: display_name
.map(ToString::to_string)
.or_else(|| name.strip_suffix(":latest").map(ToString::to_string)),
max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
keep_alive: Some(KeepAlive::indefinite()),
}
}
@@ -109,7 +106,7 @@ impl Model {
}
pub fn display_name(&self) -> &str {
&self.name
self.display_name.as_ref().unwrap_or(&self.name)
}
pub fn max_token_count(&self) -> usize {

View File

@@ -30,10 +30,15 @@ search.workspace = true
serde.workspace = true
serde_json.workspace = true
settings.workspace = true
smol.workspace = true
theme.workspace = true
util.workspace = true
worktree.workspace = true
workspace.workspace = true
[dev-dependencies]
search = { workspace = true, features = ["test-support"] }
pretty_assertions.workspace = true
[package.metadata.cargo-machete]
ignored = ["log"]

File diff suppressed because it is too large Load Diff

View File

@@ -5,6 +5,14 @@ edition = "2021"
publish = false
license = "GPL-3.0-or-later"
[features]
test-support = [
"client/test-support",
"editor/test-support",
"gpui/test-support",
"workspace/test-support",
]
[lints]
workspace = true

View File

@@ -113,7 +113,7 @@ pub fn init(cx: &mut AppContext) {
.detach();
}
struct ProjectSearch {
pub struct ProjectSearch {
project: Model<Project>,
excerpts: Model<MultiBuffer>,
pending_search: Option<Task<Option<()>>>,
@@ -151,7 +151,7 @@ pub struct ProjectSearchView {
}
#[derive(Debug, Clone)]
struct ProjectSearchSettings {
pub struct ProjectSearchSettings {
search_options: SearchOptions,
filters_enabled: bool,
}
@@ -162,7 +162,7 @@ pub struct ProjectSearchBar {
}
impl ProjectSearch {
fn new(project: Model<Project>, cx: &mut ModelContext<Self>) -> Self {
pub fn new(project: Model<Project>, cx: &mut ModelContext<Self>) -> Self {
let replica_id = project.read(cx).replica_id();
let capability = project.read(cx).capability();
@@ -605,7 +605,7 @@ impl ProjectSearchView {
});
}
fn new(
pub fn new(
model: Model<ProjectSearch>,
cx: &mut ViewContext<Self>,
settings: Option<ProjectSearchSettings>,
@@ -751,9 +751,9 @@ impl ProjectSearchView {
});
}
// Re-activate the most recently activated search in this pane or the most recent if it has been closed.
// If no search exists in the workspace, create a new one.
fn deploy_search(
/// Re-activate the most recently activated search in this pane or the most recent if it has been closed.
/// If no search exists in the workspace, create a new one.
pub fn deploy_search(
workspace: &mut Workspace,
action: &workspace::DeploySearch,
cx: &mut ViewContext<Workspace>,
@@ -1140,6 +1140,11 @@ impl ProjectSearchView {
return self.focus_results_editor(cx);
}
}
#[cfg(any(test, feature = "test-support"))]
pub fn results_editor(&self) -> &View<Editor> {
&self.results_editor
}
}
impl ProjectSearchBar {
@@ -1752,15 +1757,31 @@ fn register_workspace_action_for_present_search<A: Action>(
});
}
#[cfg(any(test, feature = "test-support"))]
pub fn perform_project_search(
search_view: &View<ProjectSearchView>,
text: impl Into<std::sync::Arc<str>>,
cx: &mut gpui::VisualTestContext,
) {
search_view.update(cx, |search_view, cx| {
search_view
.query_editor
.update(cx, |query_editor, cx| query_editor.set_text(text, cx));
search_view.search(cx);
});
cx.run_until_parked();
}
#[cfg(test)]
pub mod tests {
use std::sync::Arc;
use super::*;
use editor::{display_map::DisplayRow, DisplayPoint};
use gpui::{Action, TestAppContext, WindowHandle};
use project::FakeFs;
use serde_json::json;
use settings::SettingsStore;
use std::sync::Arc;
use workspace::DeploySearch;
#[gpui::test]

View File

@@ -117,7 +117,7 @@ pub struct RevealInProjectPanel {
pub entry_id: Option<u64>,
}
#[derive(PartialEq, Clone, Deserialize)]
#[derive(Default, PartialEq, Clone, Deserialize)]
pub struct DeploySearch {
#[serde(default)]
pub replace_enabled: bool,

View File

@@ -2,7 +2,7 @@
description = "The fast, collaborative code editor."
edition = "2021"
name = "zed"
version = "0.151.0"
version = "0.151.1"
publish = false
license = "GPL-3.0-or-later"
authors = ["Zed Team <hi@zed.dev>"]

View File

@@ -1 +1 @@
dev
stable

View File

@@ -108,33 +108,49 @@ Custom models will be listed in the model dropdown in the assistant panel.
Download and install Ollama from [ollama.com/download](https://ollama.com/download) (Linux or macOS) and ensure it's running with `ollama --version`.
You can use Ollama with the Zed assistant by making Ollama appear as an OpenAPI endpoint.
1. Download, for example, the `mistral` model with Ollama:
1. Download one of the [available models](https://ollama.com/models), for example, for `mistral`:
```sh
ollama pull mistral
```
2. Make sure that the Ollama server is running. You can start it either via running the Ollama app, or launching:
2. Make sure that the Ollama server is running. You can start it either via running Ollama.app (MacOS) or launching:
```sh
ollama serve
```
3. In the assistant panel, select one of the Ollama models using the model dropdown.
4. (Optional) If you want to change the default URL that is used to access the Ollama server, you can do so by adding the following settings:
4. (Optional) Specify a [custom api_url](#custom-endpoint) or [custom `low_speed_timeout_in_seconds`](#provider-timeout) if required.
#### Ollama Context Length {#ollama-context}}
Zed has pre-configured maximum context lengths (`max_tokens`) to match the capabilities of common models. Zed API requests to Ollama include this as `num_ctx` parameter, but the default values do not exceed `16384` so users with ~16GB of ram are able to use most models out of the box. See [get_max_tokens in ollama.rs](https://github.com/zed-industries/zed/blob/main/crates/ollama/src/ollama.rs) for a complete set of defaults.
**Note**: Tokens counts displayed in the assistant panel are only estimates and will differ from the models native tokenizer.
Depending on your hardware or use-case you may wish to limit or increase the context length for a specific model via settings.json:
```json
{
"language_models": {
"ollama": {
"api_url": "http://localhost:11434"
"low_speed_timeout_in_seconds": 120,
"available_models": [
{
"provider": "ollama",
"name": "mistral:latest",
"max_tokens": 32768
}
]
}
}
}
```
If you specify a context length that is too large for your hardware, Ollama will log an error. You can watch these logs by running: `tail -f ~/.ollama/logs/ollama.log` (MacOS) or `journalctl -u ollama -f` (Linux). Depending on the memory available on your machine, you may need to adjust the context length to a smaller value.
### OpenAI {#openai}
1. Visit the OpenAI platform and [create an API key](https://platform.openai.com/account/api-keys)