Compare commits

..

6 Commits

Author SHA1 Message Date
Joseph T. Lyons
5b61a59e6c zed 0.186.9 2025-05-15 12:43:57 +02:00
gcp-cherry-pick-bot[bot]
98f12f19b2 Fix rejecting overwritten files if the agent previously edited them (cherry-pick #30744) (#30746)
Cherry-picked Fix rejecting overwritten files if the agent previously
edited them (#30744)

Release Notes:

- Fixed rejecting overwritten files if the agent had previously edited
them.

Co-authored-by: Antonio Scandurra <me@as-cii.com>
2025-05-15 11:48:24 +02:00
Joseph T. Lyons
d3bbf52f85 zed 0.186.8 2025-05-14 16:15:56 +02:00
Oleksiy Syvokon
46f6e8b791 agent: Fix tool use in Gemini (#30689)
Thread doesn't run pending tools when `stop_reason` is not `ToolUse`.
Perhaps we should change that so that it always runs pending tools if
there are some, but for now this change just fixes setting `stop_reason`
for Google models.

Release Notes:

- N/A
2025-05-14 15:59:24 +02:00
Oleksiy Syvokon
eaf4f8f654 agent: Don't duplicate recommended models in all models list (#30692)
Release Notes:

- N/A
2025-05-14 15:58:54 +02:00
Oleksiy Syvokon
374842e0e0 agent: Overwrite files more cautiously (#30649)
1. The `edit_file` tool tended to use `create_or_overwrite` a bit too
often, leading to corruption of long files. This change replaces the
boolean flag with an `EditFileMode` enum, which helps Agent make a more
deliberate choice when overwriting files.

With this change, the pass rate of the new eval increased from 10% to
100%.

2. eval: Added ability to run eval on top of an existing thread. Threads
can now be loaded from JSON files in the `SerializedThread` format,
which makes it easy to use real threads as starting points for
tests/evals.

3. Don't try to restore tool cards when running in headless or eval mode
-- we don't have a window to properly do this.

Release Notes:

- N/A
2025-05-14 15:56:14 +02:00
23 changed files with 590 additions and 79 deletions

2
Cargo.lock generated
View File

@@ -18634,7 +18634,7 @@ dependencies = [
[[package]]
name = "zed"
version = "0.186.7"
version = "0.186.9"
dependencies = [
"activity_indicator",
"agent",

View File

@@ -49,7 +49,7 @@ pub use crate::context::{ContextLoadResult, LoadedContext};
pub use crate::inline_assistant::InlineAssistant;
use crate::slash_command_settings::SlashCommandSettings;
pub use crate::thread::{Message, MessageSegment, Thread, ThreadEvent};
pub use crate::thread_store::{TextThreadStore, ThreadStore};
pub use crate::thread_store::{SerializedThread, TextThreadStore, ThreadStore};
pub use agent_diff::{AgentDiffPane, AgentDiffToolbar};
pub use context_store::ContextStore;
pub use ui::preview::{all_agent_previews, get_agent_preview};

View File

@@ -458,7 +458,7 @@ impl Thread {
tools: Entity<ToolWorkingSet>,
prompt_builder: Arc<PromptBuilder>,
project_context: SharedProjectContext,
window: &mut Window,
window: Option<&mut Window>, // None in headless mode
cx: &mut Context<Self>,
) -> Self {
let next_message_id = MessageId(

View File

@@ -386,6 +386,25 @@ impl ThreadStore {
})
}
pub fn create_thread_from_serialized(
&mut self,
serialized: SerializedThread,
cx: &mut Context<Self>,
) -> Entity<Thread> {
cx.new(|cx| {
Thread::deserialize(
ThreadId::new(),
serialized,
self.project.clone(),
self.tools.clone(),
self.prompt_builder.clone(),
self.project_context.clone(),
None,
cx,
)
})
}
pub fn open_thread(
&self,
id: &ThreadId,
@@ -411,7 +430,7 @@ impl ThreadStore {
this.tools.clone(),
this.prompt_builder.clone(),
this.project_context.clone(),
window,
Some(window),
cx,
)
})

View File

@@ -52,15 +52,19 @@ impl ToolUseState {
/// Constructs a [`ToolUseState`] from the given list of [`SerializedMessage`]s.
///
/// Accepts a function to filter the tools that should be used to populate the state.
///
/// If `window` is `None` (e.g., when in headless mode or when running evals),
/// tool cards won't be deserialized
pub fn from_serialized_messages(
tools: Entity<ToolWorkingSet>,
messages: &[SerializedMessage],
project: Entity<Project>,
window: &mut Window,
window: Option<&mut Window>, // None in headless mode
cx: &mut App,
) -> Self {
let mut this = Self::new(tools);
let mut tool_names_by_id = HashMap::default();
let mut window = window;
for message in messages {
match message.role {
@@ -105,12 +109,17 @@ impl ToolUseState {
},
);
if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) {
if let Some(output) = tool_result.output.clone() {
if let Some(card) =
tool.deserialize_card(output, project.clone(), window, cx)
{
this.tool_result_cards.insert(tool_use_id, card);
if let Some(window) = &mut window {
if let Some(tool) = this.tools.read(cx).tool(tool_use, cx) {
if let Some(output) = tool_result.output.clone() {
if let Some(card) = tool.deserialize_card(
output,
project.clone(),
window,
cx,
) {
this.tool_result_cards.insert(tool_use_id, card);
}
}
}
}

View File

@@ -49,6 +49,37 @@ impl ActionLog {
is_created: bool,
cx: &mut Context<Self>,
) -> &mut TrackedBuffer {
let status = if is_created {
if let Some(tracked) = self.tracked_buffers.remove(&buffer) {
match tracked.status {
TrackedBufferStatus::Created {
existing_file_content,
} => TrackedBufferStatus::Created {
existing_file_content,
},
TrackedBufferStatus::Modified | TrackedBufferStatus::Deleted => {
TrackedBufferStatus::Created {
existing_file_content: Some(tracked.diff_base),
}
}
}
} else if buffer
.read(cx)
.file()
.map_or(false, |file| file.disk_state().exists())
{
TrackedBufferStatus::Created {
existing_file_content: Some(buffer.read(cx).as_rope().clone()),
}
} else {
TrackedBufferStatus::Created {
existing_file_content: None,
}
}
} else {
TrackedBufferStatus::Modified
};
let tracked_buffer = self
.tracked_buffers
.entry(buffer.clone())
@@ -60,36 +91,21 @@ impl ActionLog {
let text_snapshot = buffer.read(cx).text_snapshot();
let diff = cx.new(|cx| BufferDiff::new(&text_snapshot, cx));
let (diff_update_tx, diff_update_rx) = mpsc::unbounded();
let base_text;
let status;
let diff_base;
let unreviewed_changes;
if is_created {
let existing_file_content = if buffer
.read(cx)
.file()
.map_or(false, |file| file.disk_state().exists())
{
Some(text_snapshot.as_rope().clone())
} else {
None
};
base_text = Rope::default();
status = TrackedBufferStatus::Created {
existing_file_content,
};
diff_base = Rope::default();
unreviewed_changes = Patch::new(vec![Edit {
old: 0..1,
new: 0..text_snapshot.max_point().row + 1,
}])
} else {
base_text = buffer.read(cx).as_rope().clone();
status = TrackedBufferStatus::Modified;
diff_base = buffer.read(cx).as_rope().clone();
unreviewed_changes = Patch::default();
}
TrackedBuffer {
buffer: buffer.clone(),
base_text,
diff_base,
unreviewed_changes,
snapshot: text_snapshot.clone(),
status,
@@ -184,7 +200,7 @@ impl ActionLog {
.context("buffer not tracked")?;
let rebase = cx.background_spawn({
let mut base_text = tracked_buffer.base_text.clone();
let mut base_text = tracked_buffer.diff_base.clone();
let old_snapshot = tracked_buffer.snapshot.clone();
let new_snapshot = buffer_snapshot.clone();
let unreviewed_changes = tracked_buffer.unreviewed_changes.clone();
@@ -210,7 +226,7 @@ impl ActionLog {
))
})??;
let (new_base_text, new_base_text_rope) = rebase.await;
let (new_base_text, new_diff_base) = rebase.await;
let diff_snapshot = BufferDiff::update_diff(
diff.clone(),
buffer_snapshot.clone(),
@@ -229,24 +245,23 @@ impl ActionLog {
.background_spawn({
let diff_snapshot = diff_snapshot.clone();
let buffer_snapshot = buffer_snapshot.clone();
let new_base_text_rope = new_base_text_rope.clone();
let new_diff_base = new_diff_base.clone();
async move {
let mut unreviewed_changes = Patch::default();
for hunk in diff_snapshot.hunks_intersecting_range(
Anchor::MIN..Anchor::MAX,
&buffer_snapshot,
) {
let old_range = new_base_text_rope
let old_range = new_diff_base
.offset_to_point(hunk.diff_base_byte_range.start)
..new_base_text_rope
.offset_to_point(hunk.diff_base_byte_range.end);
..new_diff_base.offset_to_point(hunk.diff_base_byte_range.end);
let new_range = hunk.range.start..hunk.range.end;
unreviewed_changes.push(point_to_row_edit(
Edit {
old: old_range,
new: new_range,
},
&new_base_text_rope,
&new_diff_base,
&buffer_snapshot.as_rope(),
));
}
@@ -264,7 +279,7 @@ impl ActionLog {
.tracked_buffers
.get_mut(&buffer)
.context("buffer not tracked")?;
tracked_buffer.base_text = new_base_text_rope;
tracked_buffer.diff_base = new_diff_base;
tracked_buffer.snapshot = buffer_snapshot;
tracked_buffer.unreviewed_changes = unreviewed_changes;
cx.notify();
@@ -283,7 +298,6 @@ impl ActionLog {
/// Mark a buffer as edited, so we can refresh it in the context
pub fn buffer_created(&mut self, buffer: Entity<Buffer>, cx: &mut Context<Self>) {
self.edited_since_project_diagnostics_check = true;
self.tracked_buffers.remove(&buffer);
self.track_buffer_internal(buffer.clone(), true, cx);
}
@@ -346,11 +360,11 @@ impl ActionLog {
true
} else {
let old_range = tracked_buffer
.base_text
.diff_base
.point_to_offset(Point::new(edit.old.start, 0))
..tracked_buffer.base_text.point_to_offset(cmp::min(
..tracked_buffer.diff_base.point_to_offset(cmp::min(
Point::new(edit.old.end, 0),
tracked_buffer.base_text.max_point(),
tracked_buffer.diff_base.max_point(),
));
let new_range = tracked_buffer
.snapshot
@@ -359,7 +373,7 @@ impl ActionLog {
Point::new(edit.new.end, 0),
tracked_buffer.snapshot.max_point(),
));
tracked_buffer.base_text.replace(
tracked_buffer.diff_base.replace(
old_range,
&tracked_buffer
.snapshot
@@ -417,7 +431,7 @@ impl ActionLog {
}
TrackedBufferStatus::Deleted => {
buffer.update(cx, |buffer, cx| {
buffer.set_text(tracked_buffer.base_text.to_string(), cx)
buffer.set_text(tracked_buffer.diff_base.to_string(), cx)
});
let save = self
.project
@@ -464,14 +478,14 @@ impl ActionLog {
if revert {
let old_range = tracked_buffer
.base_text
.diff_base
.point_to_offset(Point::new(edit.old.start, 0))
..tracked_buffer.base_text.point_to_offset(cmp::min(
..tracked_buffer.diff_base.point_to_offset(cmp::min(
Point::new(edit.old.end, 0),
tracked_buffer.base_text.max_point(),
tracked_buffer.diff_base.max_point(),
));
let old_text = tracked_buffer
.base_text
.diff_base
.chunks_in_range(old_range)
.collect::<String>();
edits_to_revert.push((new_range, old_text));
@@ -492,7 +506,7 @@ impl ActionLog {
TrackedBufferStatus::Deleted => false,
_ => {
tracked_buffer.unreviewed_changes.clear();
tracked_buffer.base_text = tracked_buffer.snapshot.as_rope().clone();
tracked_buffer.diff_base = tracked_buffer.snapshot.as_rope().clone();
tracked_buffer.schedule_diff_update(ChangeAuthor::User, cx);
true
}
@@ -655,7 +669,7 @@ enum TrackedBufferStatus {
struct TrackedBuffer {
buffer: Entity<Buffer>,
base_text: Rope,
diff_base: Rope,
unreviewed_changes: Patch<u32>,
status: TrackedBufferStatus,
version: clock::Global,
@@ -1094,6 +1108,86 @@ mod tests {
);
}
#[gpui::test(iterations = 10)]
async fn test_overwriting_previously_edited_files(cx: &mut TestAppContext) {
init_test(cx);
let fs = FakeFs::new(cx.executor());
fs.insert_tree(
path!("/dir"),
json!({
"file1": "Lorem ipsum dolor"
}),
)
.await;
let project = Project::test(fs.clone(), [path!("/dir").as_ref()], cx).await;
let action_log = cx.new(|_| ActionLog::new(project.clone()));
let file_path = project
.read_with(cx, |project, cx| project.find_project_path("dir/file1", cx))
.unwrap();
let buffer = project
.update(cx, |project, cx| project.open_buffer(file_path, cx))
.await
.unwrap();
cx.update(|cx| {
action_log.update(cx, |log, cx| log.buffer_read(buffer.clone(), cx));
buffer.update(cx, |buffer, cx| buffer.append(" sit amet consecteur", cx));
action_log.update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
});
project
.update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))
.await
.unwrap();
cx.run_until_parked();
assert_eq!(
unreviewed_hunks(&action_log, cx),
vec![(
buffer.clone(),
vec![HunkStatus {
range: Point::new(0, 0)..Point::new(0, 37),
diff_status: DiffHunkStatusKind::Modified,
old_text: "Lorem ipsum dolor".into(),
}],
)]
);
cx.update(|cx| {
action_log.update(cx, |log, cx| log.buffer_created(buffer.clone(), cx));
buffer.update(cx, |buffer, cx| buffer.set_text("rewritten", cx));
action_log.update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
});
project
.update(cx, |project, cx| project.save_buffer(buffer.clone(), cx))
.await
.unwrap();
cx.run_until_parked();
assert_eq!(
unreviewed_hunks(&action_log, cx),
vec![(
buffer.clone(),
vec![HunkStatus {
range: Point::new(0, 0)..Point::new(0, 9),
diff_status: DiffHunkStatusKind::Added,
old_text: "".into(),
}],
)]
);
action_log
.update(cx, |log, cx| {
log.reject_edits_in_ranges(buffer.clone(), vec![2..5], cx)
})
.await
.unwrap();
cx.run_until_parked();
assert_eq!(unreviewed_hunks(&action_log, cx), vec![]);
assert_eq!(
buffer.read_with(cx, |buffer, _cx| buffer.text()),
"Lorem ipsum dolor"
);
}
#[gpui::test(iterations = 10)]
async fn test_deleting_files(cx: &mut TestAppContext) {
init_test(cx);
@@ -1601,7 +1695,7 @@ mod tests {
cx.run_until_parked();
action_log.update(cx, |log, cx| {
let tracked_buffer = log.tracked_buffers.get(&buffer).unwrap();
let mut old_text = tracked_buffer.base_text.clone();
let mut old_text = tracked_buffer.diff_base.clone();
let new_text = buffer.read(cx).as_rope();
for edit in tracked_buffer.unreviewed_changes.edits() {
let old_start = old_text.point_to_offset(Point::new(edit.new.start, 0));

View File

@@ -43,7 +43,7 @@ use crate::now_tool::NowTool;
use crate::read_file_tool::ReadFileTool;
use crate::thinking_tool::ThinkingTool;
pub use edit_file_tool::EditFileToolInput;
pub use edit_file_tool::{EditFileMode, EditFileToolInput};
pub use find_path_tool::FindPathToolInput;
pub use open_tool::OpenTool;
pub use read_file_tool::ReadFileToolInput;

View File

@@ -1,5 +1,9 @@
use super::*;
use crate::{ReadFileToolInput, edit_file_tool::EditFileToolInput, grep_tool::GrepToolInput};
use crate::{
ReadFileToolInput,
edit_file_tool::{EditFileMode, EditFileToolInput},
grep_tool::GrepToolInput,
};
use Role::*;
use anyhow::anyhow;
use assistant_tool::ToolRegistry;
@@ -71,7 +75,7 @@ fn eval_extract_handle_command_output() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
},
)],
),
@@ -127,7 +131,7 @@ fn eval_delete_run_git_blame() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
},
)],
),
@@ -182,7 +186,7 @@ fn eval_translate_doc_comments() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
},
)],
),
@@ -297,7 +301,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
},
)],
),
@@ -372,7 +376,7 @@ fn eval_disable_cursor_blinking() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
},
)],
),
@@ -566,7 +570,7 @@ fn eval_from_pixels_constructor() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
},
)],
),
@@ -643,7 +647,7 @@ fn eval_zode() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: true,
mode: EditFileMode::Create,
},
),
],
@@ -888,7 +892,7 @@ fn eval_add_overwrite_test() {
EditFileToolInput {
display_description: edit_description.into(),
path: input_file_path.into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
},
),
],

View File

@@ -75,12 +75,22 @@ pub struct EditFileToolInput {
/// </example>
pub path: PathBuf,
/// If true, this tool will recreate the file from scratch.
/// If false, this tool will produce granular edits to an existing file.
/// The mode of operation on the file. Possible values:
/// - 'edit': Make granular edits to an existing file.
/// - 'create': Create a new file if it doesn't exist.
/// - 'overwrite': Replace the entire contents of an existing file.
///
/// When a file already exists or you just created it, always prefer editing
/// When a file already exists or you just created it, prefer editing
/// it as opposed to recreating it from scratch.
pub create_or_overwrite: bool,
pub mode: EditFileMode,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
#[serde(rename_all = "lowercase")]
pub enum EditFileMode {
Edit,
Create,
Overwrite,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
@@ -194,7 +204,11 @@ impl Tool for EditFileTool {
.as_ref()
.map_or(false, |file| file.disk_state().exists())
})?;
if !input.create_or_overwrite && !exists {
let create_or_overwrite = match input.mode {
EditFileMode::Create | EditFileMode::Overwrite => true,
_ => false,
};
if !create_or_overwrite && !exists {
return Err(anyhow!("{} not found", input.path.display()));
}
@@ -206,7 +220,7 @@ impl Tool for EditFileTool {
})
.await;
let (output, mut events) = if input.create_or_overwrite {
let (output, mut events) = if create_or_overwrite {
edit_agent.overwrite(
buffer.clone(),
input.display_description.clone(),
@@ -872,7 +886,7 @@ mod tests {
let input = serde_json::to_value(EditFileToolInput {
display_description: "Some edit".into(),
path: "root/nonexistent_file.txt".into(),
create_or_overwrite: false,
mode: EditFileMode::Edit,
})
.unwrap();
Arc::new(EditFileTool)

View File

@@ -711,9 +711,9 @@ fn print_report(
.values()
.flat_map(|results| {
results.iter().map(|(example, _)| {
let absolute_path = example.run_directory.join("last.messages.json");
pathdiff::diff_paths(&absolute_path, run_dir)
.unwrap_or_else(|| absolute_path.clone())
let absolute_path = run_dir.join(example.run_directory.join("last.messages.json"));
let cwd = std::env::current_dir().expect("Can't get current dir");
pathdiff::diff_paths(&absolute_path, cwd).unwrap_or_else(|| absolute_path.clone())
})
})
.collect::<Vec<_>>();

View File

@@ -48,6 +48,7 @@ pub struct ExampleMetadata {
pub language_server: Option<LanguageServer>,
pub max_assertions: Option<usize>,
pub profile_id: AgentProfileId,
pub existing_thread_json: Option<String>,
}
#[derive(Clone, Debug)]
@@ -477,12 +478,16 @@ impl Response {
tool_name: &'static str,
cx: &mut ExampleContext,
) -> Result<&ToolUse> {
let result = self.messages.iter().find_map(|msg| {
let result = self.find_tool_call(tool_name);
cx.assert_some(result, format!("called `{}`", tool_name))
}
pub fn find_tool_call(&self, tool_name: &str) -> Option<&ToolUse> {
self.messages.iter().rev().find_map(|msg| {
msg.tool_use
.iter()
.find(|tool_use| tool_use.name == tool_name)
});
cx.assert_some(result, format!("called `{}`", tool_name))
})
}
#[allow(dead_code)]

View File

@@ -21,6 +21,7 @@ impl Example for AddArgToTraitMethod {
}),
max_assertions: None,
profile_id: AgentProfileId::default(),
existing_thread_json: None,
}
}

View File

@@ -22,6 +22,7 @@ impl Example for CodeBlockCitations {
}),
max_assertions: None,
profile_id: AgentProfileId::default(),
existing_thread_json: None,
}
}

View File

@@ -1,7 +1,7 @@
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
use anyhow::Result;
use assistant_settings::AgentProfileId;
use assistant_tools::EditFileToolInput;
use assistant_tools::{EditFileMode, EditFileToolInput};
use async_trait::async_trait;
pub struct CommentTranslation;
@@ -16,6 +16,7 @@ impl Example for CommentTranslation {
language_server: None,
max_assertions: Some(1),
profile_id: AgentProfileId::default(),
existing_thread_json: None,
}
}
@@ -35,7 +36,7 @@ impl Example for CommentTranslation {
for tool_use in thread.tool_uses_for_message(message.id, cx) {
if tool_use.name == "edit_file" {
let input: EditFileToolInput = serde_json::from_value(tool_use.input)?;
if input.create_or_overwrite {
if !matches!(input.mode, EditFileMode::Edit) {
create_or_overwrite_count += 1;
}
}

View File

@@ -18,6 +18,7 @@ impl Example for FileSearchExample {
language_server: None,
max_assertions: Some(3),
profile_id: AgentProfileId::default(),
existing_thread_json: None,
}
}

View File

@@ -16,6 +16,7 @@ mod add_arg_to_trait_method;
mod code_block_citations;
mod comment_translation;
mod file_search;
mod overwrite_file;
mod planets;
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
@@ -25,6 +26,7 @@ pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
Rc::new(code_block_citations::CodeBlockCitations),
Rc::new(planets::Planets),
Rc::new(comment_translation::CommentTranslation),
Rc::new(overwrite_file::FileOverwriteExample),
];
for example_path in list_declarative_examples(examples_dir).unwrap() {
@@ -45,6 +47,7 @@ impl DeclarativeExample {
pub fn load(example_path: &Path) -> Result<Self> {
let name = Self::name_from_path(example_path);
let base: ExampleToml = toml::from_str(&fs::read_to_string(&example_path)?)?;
let example_dir = example_path.parent().unwrap();
let language_server = if base.require_lsp {
Some(crate::example::LanguageServer {
@@ -63,6 +66,14 @@ impl DeclarativeExample {
AgentProfileId::default()
};
let existing_thread_json = if let Some(path) = base.existing_thread_path {
let content = fs::read_to_string(example_dir.join(&path))
.unwrap_or_else(|_| panic!("Failed to read existing thread file: {}", path));
Some(content)
} else {
None
};
let metadata = ExampleMetadata {
name,
url: base.url,
@@ -70,6 +81,7 @@ impl DeclarativeExample {
language_server,
max_assertions: None,
profile_id,
existing_thread_json,
};
Ok(DeclarativeExample {
@@ -110,6 +122,8 @@ pub struct ExampleToml {
pub diff_assertions: BTreeMap<String, String>,
#[serde(default)]
pub thread_assertions: BTreeMap<String, String>,
#[serde(default)]
pub existing_thread_path: Option<String>,
}
#[async_trait(?Send)]

View File

@@ -0,0 +1,49 @@
use anyhow::Result;
use assistant_settings::AgentProfileId;
use assistant_tools::{EditFileMode, EditFileToolInput};
use async_trait::async_trait;
use crate::example::{Example, ExampleContext, ExampleMetadata};
pub struct FileOverwriteExample;
/*
This eval tests a fix for a destructive behavior of the `edit_file` tool.
Previously, it would rewrite existing files too aggressively, which often
resulted in content loss.
Pass rate before the fix: 10%
Pass rate after the fix: 100%
*/
#[async_trait(?Send)]
impl Example for FileOverwriteExample {
fn meta(&self) -> ExampleMetadata {
let thread_json = include_str!("threads/overwrite-file.json");
ExampleMetadata {
name: "file_overwrite".to_string(),
url: "https://github.com/zed-industries/zed.git".to_string(),
revision: "023a60806a8cc82e73bd8d88e63b4b07fc7a0040".to_string(),
language_server: None,
max_assertions: Some(1),
profile_id: AgentProfileId::default(),
existing_thread_json: Some(thread_json.to_string()),
}
}
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
let response = cx.run_turns(1).await?;
let file_overwritten = if let Some(tool_use) = response.find_tool_call("edit_file") {
let input = tool_use.parse_input::<EditFileToolInput>()?;
match input.mode {
EditFileMode::Edit => false,
EditFileMode::Create | EditFileMode::Overwrite => true,
}
} else {
false
};
cx.assert(!file_overwritten, "File should be edited, not overwritten")
}
}

View File

@@ -18,6 +18,7 @@ impl Example for Planets {
language_server: None,
max_assertions: None,
profile_id: AgentProfileId::default(),
existing_thread_json: None,
}
}

View File

@@ -0,0 +1,262 @@
{
"completion_mode": "normal",
"cumulative_token_usage": {
"cache_creation_input_tokens": 18383,
"cache_read_input_tokens": 97250,
"input_tokens": 45,
"output_tokens": 776
},
"detailed_summary_state": "NotGenerated",
"exceeded_window_error": null,
"initial_project_snapshot": {
"timestamp": "2025-05-08T14:31:16.701157512Z",
"unsaved_buffer_paths": [],
"worktree_snapshots": [
{
"git_state": {
"current_branch": null,
"diff": "diff --git a/crates/language_model_selector/src/language_model_selector.rs b/crates/language_model_selector/src/language_model_selector.rs\nindex 6775bee98a..e25c9e1415 100644\n--- a/crates/language_model_selector/src/language_model_selector.rs\n+++ b/crates/language_model_selector/src/language_model_selector.rs\n@@ -410,7 +410,8 @@ impl ModelMatcher {\n }\n \n pub fn is_match(self: &Self, info: &ModelInfo) -> bool {\n- self.matched_ids.contains(&info.model.id().0)\n+ let q = (info.model.provider_id(), info.model.id());\n+ self.matched_models.contains(&q)\n }\n }\n \n",
"head_sha": "9245656485e58a5d6d717d82209bc8c57cb9c539",
"remote_url": "git@github.com:zed-industries/zed.git"
},
"worktree_path": "/home/silver/develop/zed"
}
]
},
"messages": [
{
"context": "\n<context>\nThe following items were attached by the user. They are up-to-date and don't need to be re-read.\n\n<files>\n```rs zed/crates/language_model_selector/src/language_model_selector.rs\nconst TRY_ZED_PRO_URL [L28]\ntype OnModelChanged [L30]\ntype GetActiveModel [L31]\npub struct LanguageModelSelector [L33-37]\n picker [L34]\n _authenticate_all_providers_task [L35]\n _subscriptions [L36]\nimpl LanguageModelSelector [L39-231]\n pub fn new [L40-81]\n fn handle_language_model_registry_event [L83-104]\n fn authenticate_all_providers [L110-154]\n fn all_models [L156-204]\n pub fn active_model [L206-208]\n fn get_active_model_index [L210-230]\nimpl EventEmitter<DismissEvent> for LanguageModelSelector [L233]\nimpl Focusable for LanguageModelSelector [L235-239]\n fn focus_handle [L236-238]\nimpl Render for LanguageModelSelector [L241-245]\n fn render [L242-244]\npub struct LanguageModelSelectorPopoverMenu [L248-258]\n language_model_selector [L253]\n trigger [L254]\n tooltip [L255]\n handle [L256]\n anchor [L257]\nimpl LanguageModelSelectorPopoverMenu<T, TT> [L260-284]\n pub fn new [L265-278]\n pub fn with_handle [L280-283]\nimpl RenderOnce for LanguageModelSelectorPopoverMenu<T, TT> [L286-304]\n fn render [L291-303]\nstruct ModelInfo [L307-310]\n model [L308]\n icon [L309]\npub struct LanguageModelPickerDelegate [L312-319]\n language_model_selector [L313]\n on_model_changed [L314]\n get_active_model [L315]\n all_models [L316]\n filtered_entries [L317]\n selected_index [L318]\nstruct GroupedModels [L321-324]\n recommended [L322]\n other [L323]\nimpl GroupedModels [L326-385]\n pub fn new [L327-342]\n fn entries [L344-370]\n fn model_infos [L372-384]\nenum LanguageModelPickerEntry [L387-390]\n Model [L388]\n Separator [L389]\nstruct ModelMatcher [L392-396]\n models [L393]\n bg_executor [L394]\n candidates [L395]\nimpl ModelMatcher [L398-452]\n fn new [L399-406]\n pub fn fuzzy_search [L408-424]\n pub fn exact_search [L426-438]\n fn make_match_candidates [L440-451]\nimpl PickerDelegate for LanguageModelPickerDelegate [L454-693]\n type ListItem [L455]\n fn match_count [L457-459]\n fn selected_index [L461-463]\n fn set_selected_index [L465-468]\n fn can_select [L470-480]\n fn placeholder_text [L482-484]\n fn update_matches [L486-546]\n fn confirm [L548-560]\n fn dismissed [L562-566]\n fn render_match [L568-636]\n fn render_footer [L638-692]\n\nShowing symbols 1-71 (total symbols: 71)\n\n```\n</files>\n</context>\n",
"creases": [
{
"end": 140,
"icon_path": "icons/file_icons/rust.svg",
"label": "language_model_selector.rs",
"start": 38
}
],
"id": 0,
"role": "user",
"segments": [
{
"text": "I recently introduced fuzzy search in [@language_model_selector.rs](@file:zed/crates/language_model_selector/src/language_model_selector.rs). Can you write unit-test that check it? You can find a test list of models on images. ",
"type": "text"
}
],
"tool_results": [],
"tool_uses": []
},
{
"context": "",
"creases": [],
"id": 1,
"role": "assistant",
"segments": [
{
"text": "I'll help you write unit tests for the fuzzy search functionality in the language model selector. I see that you want to test the fuzzy search with the models shown in the images - both Claude models (3, 3.5, 3.7) and GPT models (GPT-3.5, GPT-4, GPT-4o, etc.).\n\nLet's start by examining the code structure to understand how the fuzzy search works.",
"type": "text"
}
],
"tool_results": [
{
"content": "zed/crates/language_model_selector/Cargo.toml\nzed/crates/language_model_selector/LICENSE-GPL\nzed/crates/language_model_selector/src\n",
"is_error": false,
"output": null,
"tool_use_id": "toolu_01FvYMtHs763GL1wA5mpTpqn"
}
],
"tool_uses": [
{
"id": "toolu_01FvYMtHs763GL1wA5mpTpqn",
"input": {
"path": "zed/crates/language_model_selector"
},
"name": "list_directory"
}
]
},
{
"context": "",
"creases": [],
"id": 2,
"role": "assistant",
"segments": [
{
"text": "Let's look at the structure of the tests directory or see if there are existing tests:",
"type": "text"
}
],
"tool_results": [
{
"content": "zed/crates/language_model_selector/src/language_model_selector.rs\n",
"is_error": false,
"output": null,
"tool_use_id": "toolu_01KSLDsLLrf7tgixB1sge3cm"
}
],
"tool_uses": [
{
"id": "toolu_01KSLDsLLrf7tgixB1sge3cm",
"input": {
"path": "zed/crates/language_model_selector/src"
},
"name": "list_directory"
}
]
},
{
"context": "",
"creases": [],
"id": 3,
"role": "assistant",
"segments": [
{
"text": "Let's check the Cargo.toml to understand the dependencies:",
"type": "text"
}
],
"tool_results": [
{
"content": "[package]\nname = \"language_model_selector\"\nversion = \"0.1.0\"\nedition.workspace = true\npublish.workspace = true\nlicense = \"GPL-3.0-or-later\"\n\n[lints]\nworkspace = true\n\n[lib]\npath = \"src/language_model_selector.rs\"\n\n[dependencies]\ncollections.workspace = true\nfeature_flags.workspace = true\nfuzzy.workspace = true\ngpui.workspace = true\nlanguage_model.workspace = true\nlog.workspace = true\npicker.workspace = true\nproto.workspace = true\nui.workspace = true\nworkspace-hack.workspace = true\nzed_actions.workspace = true\n",
"is_error": false,
"output": null,
"tool_use_id": "toolu_019Je2MLfJhpJr93g5igoRAH"
}
],
"tool_uses": [
{
"id": "toolu_019Je2MLfJhpJr93g5igoRAH",
"input": {
"path": "zed/crates/language_model_selector/Cargo.toml"
},
"name": "read_file"
}
]
},
{
"context": "",
"creases": [],
"id": 4,
"role": "assistant",
"segments": [
{
"text": "Now let's look at the fuzzy search implementation in detail:",
"type": "text"
}
],
"tool_results": [
{
"content": "struct ModelMatcher {\n models: Vec<ModelInfo>,\n bg_executor: BackgroundExecutor,\n candidates: Vec<StringMatchCandidate>,\n}\n\nimpl ModelMatcher {\n fn new(models: Vec<ModelInfo>, bg_executor: BackgroundExecutor) -> ModelMatcher {\n let candidates = Self::make_match_candidates(&models);\n Self {\n models,\n bg_executor,\n candidates,\n }\n }\n\n pub fn fuzzy_search(&self, query: &str) -> Vec<ModelInfo> {\n let matches = self.bg_executor.block(match_strings(\n &self.candidates,\n &query,\n false,\n 100,\n &Default::default(),\n self.bg_executor.clone(),\n ));\n\n let matched_models: Vec<_> = matches\n .into_iter()\n .map(|m| self.models[m.candidate_id].clone())\n .collect();\n\n matched_models\n }\n\n pub fn exact_search(&self, query: &str) -> Vec<ModelInfo> {\n self.models\n .iter()\n .filter(|m| {\n m.model\n .name()\n .0\n .to_lowercase()\n .contains(&query.to_lowercase())\n })\n .cloned()\n .collect::<Vec<_>>()\n }\n\n fn make_match_candidates(model_infos: &Vec<ModelInfo>) -> Vec<StringMatchCandidate> {\n model_infos\n .iter()\n .enumerate()\n .map(|(index, model)| {\n StringMatchCandidate::new(\n index,\n &format!(\"{}/{}\", &model.model.provider_id().0, &model.model.name().0),\n )\n })\n .collect::<Vec<_>>()\n }\n}",
"is_error": false,
"output": null,
"tool_use_id": "toolu_01GpKFEn6hYrXGiS5yBF3urk"
}
],
"tool_uses": [
{
"id": "toolu_01GpKFEn6hYrXGiS5yBF3urk",
"input": {
"end_line": 452,
"path": "zed/crates/language_model_selector/src/language_model_selector.rs",
"start_line": 392
},
"name": "read_file"
}
]
},
{
"context": "",
"creases": [],
"id": 5,
"role": "assistant",
"segments": [
{
"text": "Let's now check if there's an existing test module:",
"type": "text"
}
],
"tool_results": [
{
"content": "No matches found",
"is_error": false,
"output": null,
"tool_use_id": "toolu_01RZAvFvpMq16Dwh6d6cnVzy"
}
],
"tool_uses": [
{
"id": "toolu_01RZAvFvpMq16Dwh6d6cnVzy",
"input": {
"include_pattern": "zed/crates/language_model_selector/**/*.rs",
"regex": "mod tests"
},
"name": "grep"
}
]
},
{
"context": "",
"creases": [],
"id": 6,
"role": "assistant",
"segments": [
{
"text": "Now I'll create a test module for the ModelMatcher's fuzzy search functionality. Let's create this test module:",
"type": "text"
}
],
"tool_results": [],
"tool_uses": []
}
],
"model": {
"model": "claude-3-7-sonnet-latest",
"provider": "zed.dev"
},
"profile": "Write",
"request_token_usage": [
{
"cache_creation_input_tokens": 9581,
"input_tokens": 4,
"output_tokens": 5
},
{
"cache_creation_input_tokens": 204,
"cache_read_input_tokens": 9581,
"input_tokens": 6,
"output_tokens": 5
},
{
"cache_creation_input_tokens": 104,
"cache_read_input_tokens": 9785,
"input_tokens": 6,
"output_tokens": 5
},
{
"cache_creation_input_tokens": 239,
"cache_read_input_tokens": 9889,
"input_tokens": 6,
"output_tokens": 5
},
{
"cache_creation_input_tokens": 569,
"cache_read_input_tokens": 10128,
"input_tokens": 5,
"output_tokens": 5
},
{
"cache_creation_input_tokens": 87,
"cache_read_input_tokens": 10697,
"input_tokens": 5,
"output_tokens": 2
},
{
"cache_creation_input_tokens": 7355,
"cache_read_input_tokens": 10784,
"input_tokens": 5,
"output_tokens": 3
}
],
"summary": "Fuzzy Search Testing Language Model Selector",
"updated_at": "2025-05-08T18:20:34.205405751Z",
"version": "0.2.0"
}

View File

@@ -1,4 +1,4 @@
use agent::{Message, MessageSegment, ThreadStore};
use agent::{Message, MessageSegment, SerializedThread, ThreadStore};
use anyhow::{Context, Result, anyhow, bail};
use assistant_tool::ToolWorkingSet;
use client::proto::LspWorkProgress;
@@ -312,7 +312,14 @@ impl ExampleInstance {
thread_store.update(cx, |thread_store, cx| thread_store.load_profile_by_id(profile_id, cx)).expect("Failed to load profile");
let thread =
thread_store.update(cx, |thread_store, cx| thread_store.create_thread(cx))?;
thread_store.update(cx, |thread_store, cx| {
if let Some(json) = &meta.existing_thread_json {
let serialized = SerializedThread::from_json(json.as_bytes()).expect("Can't read serialized thread");
thread_store.create_thread_from_serialized(serialized, cx)
} else {
thread_store.create_thread(cx)
}
})?;
thread.update(cx, |thread, _cx| {

View File

@@ -326,8 +326,14 @@ struct GroupedModels {
impl GroupedModels {
pub fn new(other: Vec<ModelInfo>, recommended: Vec<ModelInfo>) -> Self {
let recommended_ids: HashSet<_> = recommended.iter().map(|info| info.model.id()).collect();
let mut other_by_provider: IndexMap<_, Vec<ModelInfo>> = IndexMap::default();
for model in other {
if recommended_ids.contains(&model.model.id()) {
continue;
}
let provider = model.model.provider_id();
if let Some(models) = other_by_provider.get_mut(&provider) {
models.push(model);
@@ -885,4 +891,26 @@ mod tests {
let results = matcher.fuzzy_search("z4n");
assert_models_eq(results, vec!["zed/gpt-4.1-nano"]);
}
#[gpui::test]
fn test_exclude_recommended_models(_cx: &mut TestAppContext) {
let recommended_models = create_models(vec![("zed", "claude")]);
let all_models = create_models(vec![
("zed", "claude"), // Should be filtered out from "other"
("zed", "gemini"),
("copilot", "o3"),
]);
let grouped_models = GroupedModels::new(all_models, recommended_models);
let actual_other_models = grouped_models
.other
.values()
.flatten()
.cloned()
.collect::<Vec<_>>();
// Recommended models should not appear in "other"
assert_models_eq(actual_other_models, vec!["zed/gemini", "copilot/o3"]);
}
}

View File

@@ -599,6 +599,7 @@ impl GoogleEventMapper {
// responds with `finish_reason: STOP`
if wants_to_use_tool {
self.stop_reason = StopReason::ToolUse;
events.push(Ok(LanguageModelCompletionEvent::Stop(StopReason::ToolUse)));
}
events
}

View File

@@ -2,7 +2,7 @@
description = "The fast, collaborative code editor."
edition.workspace = true
name = "zed"
version = "0.186.7"
version = "0.186.9"
publish.workspace = true
license = "GPL-3.0-or-later"
authors = ["Zed Team <hi@zed.dev>"]