Compare commits
6 Commits
fix-git-ht
...
v0.151.1
| Author | SHA1 | Date | |
|---|---|---|---|
|
|
0c74f4e0ca | ||
|
|
7c0ad81a3d | ||
|
|
cd0841c590 | ||
|
|
f44f5c5bc7 | ||
|
|
670b3b9382 | ||
|
|
75da9af3e6 |
582
Cargo.lock
generated
582
Cargo.lock
generated
File diff suppressed because it is too large
Load Diff
@@ -135,6 +135,7 @@ impl AssistantSettingsContent {
|
||||
Some(language_model::settings::OllamaSettingsContent {
|
||||
api_url,
|
||||
low_speed_timeout_in_seconds,
|
||||
available_models: None,
|
||||
});
|
||||
}
|
||||
},
|
||||
@@ -295,7 +296,7 @@ impl AssistantSettingsContent {
|
||||
_ => (None, None),
|
||||
};
|
||||
settings.provider = Some(AssistantProviderContentV1::Ollama {
|
||||
default_model: Some(ollama::Model::new(&model)),
|
||||
default_model: Some(ollama::Model::new(&model, None, None)),
|
||||
api_url,
|
||||
low_speed_timeout_in_seconds,
|
||||
});
|
||||
|
||||
@@ -107,8 +107,10 @@ impl Match {
|
||||
|
||||
if let Some(path_match) = &self.path_match {
|
||||
text.push_str(&path_match.path.to_string_lossy());
|
||||
let mut whole_path = PathBuf::from(path_match.path_prefix.to_string());
|
||||
whole_path = whole_path.join(path_match.path.clone());
|
||||
for (range, style) in highlight_ranges(
|
||||
&path_match.path.to_string_lossy(),
|
||||
&whole_path.to_string_lossy(),
|
||||
&path_match.positions,
|
||||
gpui::HighlightStyle::color(Color::Accent.color(cx)),
|
||||
) {
|
||||
|
||||
@@ -6,8 +6,10 @@ use ollama::{
|
||||
get_models, preload_model, stream_chat_completion, ChatMessage, ChatOptions, ChatRequest,
|
||||
ChatResponseDelta, OllamaToolCall,
|
||||
};
|
||||
use schemars::JsonSchema;
|
||||
use serde::{Deserialize, Serialize};
|
||||
use settings::{Settings, SettingsStore};
|
||||
use std::{sync::Arc, time::Duration};
|
||||
use std::{collections::BTreeMap, sync::Arc, time::Duration};
|
||||
use ui::{prelude::*, ButtonLike, Indicator};
|
||||
use util::ResultExt;
|
||||
|
||||
@@ -28,6 +30,17 @@ const PROVIDER_NAME: &str = "Ollama";
|
||||
pub struct OllamaSettings {
|
||||
pub api_url: String,
|
||||
pub low_speed_timeout: Option<Duration>,
|
||||
pub available_models: Vec<AvailableModel>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, PartialEq, Serialize, Deserialize, JsonSchema)]
|
||||
pub struct AvailableModel {
|
||||
/// The model name in the Ollama API (e.g. "llama3.1:latest")
|
||||
pub name: String,
|
||||
/// The model's name in Zed's UI, such as in the model selector dropdown menu in the assistant panel.
|
||||
pub display_name: Option<String>,
|
||||
/// The Context Length parameter to the model (aka num_ctx or n_ctx)
|
||||
pub max_tokens: usize,
|
||||
}
|
||||
|
||||
pub struct OllamaLanguageModelProvider {
|
||||
@@ -61,7 +74,7 @@ impl State {
|
||||
// indicating which models are embedding models,
|
||||
// simply filter out models with "-embed" in their name
|
||||
.filter(|model| !model.name.contains("-embed"))
|
||||
.map(|model| ollama::Model::new(&model.name))
|
||||
.map(|model| ollama::Model::new(&model.name, None, None))
|
||||
.collect();
|
||||
|
||||
models.sort_by(|a, b| a.name.cmp(&b.name));
|
||||
@@ -123,10 +136,32 @@ impl LanguageModelProvider for OllamaLanguageModelProvider {
|
||||
}
|
||||
|
||||
fn provided_models(&self, cx: &AppContext) -> Vec<Arc<dyn LanguageModel>> {
|
||||
self.state
|
||||
.read(cx)
|
||||
let mut models: BTreeMap<String, ollama::Model> = BTreeMap::default();
|
||||
|
||||
// Add models from the Ollama API
|
||||
for model in self.state.read(cx).available_models.iter() {
|
||||
models.insert(model.name.clone(), model.clone());
|
||||
}
|
||||
|
||||
// Override with available models from settings
|
||||
for model in AllLanguageModelSettings::get_global(cx)
|
||||
.ollama
|
||||
.available_models
|
||||
.iter()
|
||||
{
|
||||
models.insert(
|
||||
model.name.clone(),
|
||||
ollama::Model {
|
||||
name: model.name.clone(),
|
||||
display_name: model.display_name.clone(),
|
||||
max_tokens: model.max_tokens,
|
||||
keep_alive: None,
|
||||
},
|
||||
);
|
||||
}
|
||||
|
||||
models
|
||||
.into_values()
|
||||
.map(|model| {
|
||||
Arc::new(OllamaLanguageModel {
|
||||
id: LanguageModelId::from(model.name.clone()),
|
||||
|
||||
@@ -152,6 +152,7 @@ pub struct AnthropicSettingsContentV1 {
|
||||
pub struct OllamaSettingsContent {
|
||||
pub api_url: Option<String>,
|
||||
pub low_speed_timeout_in_seconds: Option<u64>,
|
||||
pub available_models: Option<Vec<provider::ollama::AvailableModel>>,
|
||||
}
|
||||
|
||||
#[derive(Clone, Debug, Serialize, Deserialize, PartialEq, JsonSchema)]
|
||||
@@ -276,6 +277,9 @@ impl settings::Settings for AllLanguageModelSettings {
|
||||
anthropic.as_ref().and_then(|s| s.available_models.clone()),
|
||||
);
|
||||
|
||||
// Ollama
|
||||
let ollama = value.ollama.clone();
|
||||
|
||||
merge(
|
||||
&mut settings.ollama.api_url,
|
||||
value.ollama.as_ref().and_then(|s| s.api_url.clone()),
|
||||
@@ -288,6 +292,10 @@ impl settings::Settings for AllLanguageModelSettings {
|
||||
settings.ollama.low_speed_timeout =
|
||||
Some(Duration::from_secs(low_speed_timeout_in_seconds));
|
||||
}
|
||||
merge(
|
||||
&mut settings.ollama.available_models,
|
||||
ollama.as_ref().and_then(|s| s.available_models.clone()),
|
||||
);
|
||||
|
||||
// OpenAI
|
||||
let (openai, upgraded) = match value.openai.clone().map(|s| s.upgrade()) {
|
||||
|
||||
@@ -66,40 +66,37 @@ impl Default for KeepAlive {
|
||||
#[derive(Clone, Debug, Default, Serialize, Deserialize, PartialEq)]
|
||||
pub struct Model {
|
||||
pub name: String,
|
||||
pub display_name: Option<String>,
|
||||
pub max_tokens: usize,
|
||||
pub keep_alive: Option<KeepAlive>,
|
||||
}
|
||||
|
||||
// This could be dynamically retrieved via the API (1 call per model)
|
||||
// curl -s http://localhost:11434/api/show -d '{"model": "llama3.1:latest"}' | jq '.model_info."llama.context_length"'
|
||||
fn get_max_tokens(name: &str) -> usize {
|
||||
match name {
|
||||
"dolphin-llama3:8b-256k" => 262144, // 256K
|
||||
_ => match name.split(':').next().unwrap() {
|
||||
"mistral-nemo" => 1024000, // 1M
|
||||
"deepseek-coder-v2" => 163840, // 160K
|
||||
"llama3.1" | "phi3" | "command-r" | "command-r-plus" => 131072, // 128K
|
||||
"codeqwen" => 65536, // 64K
|
||||
"mistral" | "mistral-large" | "dolphin-mistral" | "codestral" // 32K
|
||||
| "mistral-openorca" | "dolphin-mixtral" | "mixstral" | "llava"
|
||||
| "qwen" | "qwen2" | "wizardlm2" | "wizard-math" => 32768,
|
||||
"codellama" | "stable-code" | "deepseek-coder" | "starcoder2" // 16K
|
||||
| "wizardcoder" => 16384,
|
||||
"llama3" | "gemma2" | "gemma" | "codegemma" | "dolphin-llama3" // 8K
|
||||
| "llava-llama3" | "starcoder" | "openchat" | "aya" => 8192,
|
||||
"llama2" | "yi" | "llama2-chinese" | "vicuna" | "nous-hermes2" // 4K
|
||||
| "stablelm2" => 4096,
|
||||
"phi" | "orca-mini" | "tinyllama" | "granite-code" => 2048, // 2K
|
||||
_ => 2048, // 2K (default)
|
||||
},
|
||||
/// Default context length for unknown models.
|
||||
const DEFAULT_TOKENS: usize = 2048;
|
||||
/// Magic number. Lets many Ollama models work with ~16GB of ram.
|
||||
const MAXIMUM_TOKENS: usize = 16384;
|
||||
|
||||
match name.split(':').next().unwrap() {
|
||||
"phi" | "tinyllama" | "granite-code" => 2048,
|
||||
"llama2" | "yi" | "vicuna" | "stablelm2" => 4096,
|
||||
"llama3" | "gemma2" | "gemma" | "codegemma" | "starcoder" | "aya" => 8192,
|
||||
"codellama" | "starcoder2" => 16384,
|
||||
"mistral" | "codestral" | "mixstral" | "llava" | "qwen2" | "dolphin-mixtral" => 32768,
|
||||
"llama3.1" | "phi3" | "phi3.5" | "command-r" | "deepseek-coder-v2" => 128000,
|
||||
_ => DEFAULT_TOKENS,
|
||||
}
|
||||
.clamp(1, MAXIMUM_TOKENS)
|
||||
}
|
||||
|
||||
impl Model {
|
||||
pub fn new(name: &str) -> Self {
|
||||
pub fn new(name: &str, display_name: Option<&str>, max_tokens: Option<usize>) -> Self {
|
||||
Self {
|
||||
name: name.to_owned(),
|
||||
max_tokens: get_max_tokens(name),
|
||||
display_name: display_name
|
||||
.map(ToString::to_string)
|
||||
.or_else(|| name.strip_suffix(":latest").map(ToString::to_string)),
|
||||
max_tokens: max_tokens.unwrap_or_else(|| get_max_tokens(name)),
|
||||
keep_alive: Some(KeepAlive::indefinite()),
|
||||
}
|
||||
}
|
||||
@@ -109,7 +106,7 @@ impl Model {
|
||||
}
|
||||
|
||||
pub fn display_name(&self) -> &str {
|
||||
&self.name
|
||||
self.display_name.as_ref().unwrap_or(&self.name)
|
||||
}
|
||||
|
||||
pub fn max_token_count(&self) -> usize {
|
||||
|
||||
@@ -30,10 +30,15 @@ search.workspace = true
|
||||
serde.workspace = true
|
||||
serde_json.workspace = true
|
||||
settings.workspace = true
|
||||
smol.workspace = true
|
||||
theme.workspace = true
|
||||
util.workspace = true
|
||||
worktree.workspace = true
|
||||
workspace.workspace = true
|
||||
|
||||
[dev-dependencies]
|
||||
search = { workspace = true, features = ["test-support"] }
|
||||
pretty_assertions.workspace = true
|
||||
|
||||
[package.metadata.cargo-machete]
|
||||
ignored = ["log"]
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@@ -5,6 +5,14 @@ edition = "2021"
|
||||
publish = false
|
||||
license = "GPL-3.0-or-later"
|
||||
|
||||
[features]
|
||||
test-support = [
|
||||
"client/test-support",
|
||||
"editor/test-support",
|
||||
"gpui/test-support",
|
||||
"workspace/test-support",
|
||||
]
|
||||
|
||||
[lints]
|
||||
workspace = true
|
||||
|
||||
|
||||
@@ -113,7 +113,7 @@ pub fn init(cx: &mut AppContext) {
|
||||
.detach();
|
||||
}
|
||||
|
||||
struct ProjectSearch {
|
||||
pub struct ProjectSearch {
|
||||
project: Model<Project>,
|
||||
excerpts: Model<MultiBuffer>,
|
||||
pending_search: Option<Task<Option<()>>>,
|
||||
@@ -151,7 +151,7 @@ pub struct ProjectSearchView {
|
||||
}
|
||||
|
||||
#[derive(Debug, Clone)]
|
||||
struct ProjectSearchSettings {
|
||||
pub struct ProjectSearchSettings {
|
||||
search_options: SearchOptions,
|
||||
filters_enabled: bool,
|
||||
}
|
||||
@@ -162,7 +162,7 @@ pub struct ProjectSearchBar {
|
||||
}
|
||||
|
||||
impl ProjectSearch {
|
||||
fn new(project: Model<Project>, cx: &mut ModelContext<Self>) -> Self {
|
||||
pub fn new(project: Model<Project>, cx: &mut ModelContext<Self>) -> Self {
|
||||
let replica_id = project.read(cx).replica_id();
|
||||
let capability = project.read(cx).capability();
|
||||
|
||||
@@ -605,7 +605,7 @@ impl ProjectSearchView {
|
||||
});
|
||||
}
|
||||
|
||||
fn new(
|
||||
pub fn new(
|
||||
model: Model<ProjectSearch>,
|
||||
cx: &mut ViewContext<Self>,
|
||||
settings: Option<ProjectSearchSettings>,
|
||||
@@ -751,9 +751,9 @@ impl ProjectSearchView {
|
||||
});
|
||||
}
|
||||
|
||||
// Re-activate the most recently activated search in this pane or the most recent if it has been closed.
|
||||
// If no search exists in the workspace, create a new one.
|
||||
fn deploy_search(
|
||||
/// Re-activate the most recently activated search in this pane or the most recent if it has been closed.
|
||||
/// If no search exists in the workspace, create a new one.
|
||||
pub fn deploy_search(
|
||||
workspace: &mut Workspace,
|
||||
action: &workspace::DeploySearch,
|
||||
cx: &mut ViewContext<Workspace>,
|
||||
@@ -1140,6 +1140,11 @@ impl ProjectSearchView {
|
||||
return self.focus_results_editor(cx);
|
||||
}
|
||||
}
|
||||
|
||||
#[cfg(any(test, feature = "test-support"))]
|
||||
pub fn results_editor(&self) -> &View<Editor> {
|
||||
&self.results_editor
|
||||
}
|
||||
}
|
||||
|
||||
impl ProjectSearchBar {
|
||||
@@ -1752,15 +1757,31 @@ fn register_workspace_action_for_present_search<A: Action>(
|
||||
});
|
||||
}
|
||||
|
||||
#[cfg(any(test, feature = "test-support"))]
|
||||
pub fn perform_project_search(
|
||||
search_view: &View<ProjectSearchView>,
|
||||
text: impl Into<std::sync::Arc<str>>,
|
||||
cx: &mut gpui::VisualTestContext,
|
||||
) {
|
||||
search_view.update(cx, |search_view, cx| {
|
||||
search_view
|
||||
.query_editor
|
||||
.update(cx, |query_editor, cx| query_editor.set_text(text, cx));
|
||||
search_view.search(cx);
|
||||
});
|
||||
cx.run_until_parked();
|
||||
}
|
||||
|
||||
#[cfg(test)]
|
||||
pub mod tests {
|
||||
use std::sync::Arc;
|
||||
|
||||
use super::*;
|
||||
use editor::{display_map::DisplayRow, DisplayPoint};
|
||||
use gpui::{Action, TestAppContext, WindowHandle};
|
||||
use project::FakeFs;
|
||||
use serde_json::json;
|
||||
use settings::SettingsStore;
|
||||
use std::sync::Arc;
|
||||
use workspace::DeploySearch;
|
||||
|
||||
#[gpui::test]
|
||||
|
||||
@@ -117,7 +117,7 @@ pub struct RevealInProjectPanel {
|
||||
pub entry_id: Option<u64>,
|
||||
}
|
||||
|
||||
#[derive(PartialEq, Clone, Deserialize)]
|
||||
#[derive(Default, PartialEq, Clone, Deserialize)]
|
||||
pub struct DeploySearch {
|
||||
#[serde(default)]
|
||||
pub replace_enabled: bool,
|
||||
|
||||
@@ -2,7 +2,7 @@
|
||||
description = "The fast, collaborative code editor."
|
||||
edition = "2021"
|
||||
name = "zed"
|
||||
version = "0.151.0"
|
||||
version = "0.151.1"
|
||||
publish = false
|
||||
license = "GPL-3.0-or-later"
|
||||
authors = ["Zed Team <hi@zed.dev>"]
|
||||
|
||||
@@ -1 +1 @@
|
||||
dev
|
||||
stable
|
||||
@@ -108,33 +108,49 @@ Custom models will be listed in the model dropdown in the assistant panel.
|
||||
|
||||
Download and install Ollama from [ollama.com/download](https://ollama.com/download) (Linux or macOS) and ensure it's running with `ollama --version`.
|
||||
|
||||
You can use Ollama with the Zed assistant by making Ollama appear as an OpenAPI endpoint.
|
||||
|
||||
1. Download, for example, the `mistral` model with Ollama:
|
||||
1. Download one of the [available models](https://ollama.com/models), for example, for `mistral`:
|
||||
|
||||
```sh
|
||||
ollama pull mistral
|
||||
```
|
||||
|
||||
2. Make sure that the Ollama server is running. You can start it either via running the Ollama app, or launching:
|
||||
2. Make sure that the Ollama server is running. You can start it either via running Ollama.app (MacOS) or launching:
|
||||
|
||||
```sh
|
||||
ollama serve
|
||||
```
|
||||
|
||||
3. In the assistant panel, select one of the Ollama models using the model dropdown.
|
||||
4. (Optional) If you want to change the default URL that is used to access the Ollama server, you can do so by adding the following settings:
|
||||
|
||||
4. (Optional) Specify a [custom api_url](#custom-endpoint) or [custom `low_speed_timeout_in_seconds`](#provider-timeout) if required.
|
||||
|
||||
#### Ollama Context Length {#ollama-context}}
|
||||
|
||||
Zed has pre-configured maximum context lengths (`max_tokens`) to match the capabilities of common models. Zed API requests to Ollama include this as `num_ctx` parameter, but the default values do not exceed `16384` so users with ~16GB of ram are able to use most models out of the box. See [get_max_tokens in ollama.rs](https://github.com/zed-industries/zed/blob/main/crates/ollama/src/ollama.rs) for a complete set of defaults.
|
||||
|
||||
**Note**: Tokens counts displayed in the assistant panel are only estimates and will differ from the models native tokenizer.
|
||||
|
||||
Depending on your hardware or use-case you may wish to limit or increase the context length for a specific model via settings.json:
|
||||
|
||||
```json
|
||||
{
|
||||
"language_models": {
|
||||
"ollama": {
|
||||
"api_url": "http://localhost:11434"
|
||||
"low_speed_timeout_in_seconds": 120,
|
||||
"available_models": [
|
||||
{
|
||||
"provider": "ollama",
|
||||
"name": "mistral:latest",
|
||||
"max_tokens": 32768
|
||||
}
|
||||
]
|
||||
}
|
||||
}
|
||||
}
|
||||
```
|
||||
|
||||
If you specify a context length that is too large for your hardware, Ollama will log an error. You can watch these logs by running: `tail -f ~/.ollama/logs/ollama.log` (MacOS) or `journalctl -u ollama -f` (Linux). Depending on the memory available on your machine, you may need to adjust the context length to a smaller value.
|
||||
|
||||
### OpenAI {#openai}
|
||||
|
||||
1. Visit the OpenAI platform and [create an API key](https://platform.openai.com/account/api-keys)
|
||||
|
||||
Reference in New Issue
Block a user