Compare commits

...

13 Commits

Author SHA1 Message Date
Antonio Scandurra
3f7f22e50b Checkpoint 2025-04-27 20:10:20 +02:00
Antonio Scandurra
938a9ad405 Refine description for edit_file_tool 2025-04-27 19:21:16 +02:00
Antonio Scandurra
190bdddfc1 Update description.md 2025-04-27 12:05:00 +02:00
Antonio Scandurra
7ad3816ea9 WIP 2025-04-27 11:52:20 +02:00
Antonio Scandurra
530d2a0ccd Rename eval 2025-04-27 10:46:23 +02:00
Antonio Scandurra
0cd0ee6fb9 Checkpoint: first eval passing! 2025-04-27 10:29:15 +02:00
Antonio Scandurra
936972d9b0 Run evals in parallel 2025-04-26 13:35:54 +02:00
Antonio Scandurra
e9533423db Checkpoint 2025-04-26 12:49:28 +02:00
Antonio Scandurra
ba480295c1 WIP: working test_remove_function in a loop 2025-04-25 21:39:55 +02:00
Antonio Scandurra
9106f4495b Introduce a new EditParser struct 2025-04-25 21:18:01 +02:00
Antonio Scandurra
1feb1296fe WIP 2025-04-25 11:32:12 +02:00
Antonio Scandurra
582a247922 Merge remote-tracking branch 'origin/main' into streaming-edits 2025-04-25 09:41:40 +02:00
Antonio Scandurra
c2881a4537 Add a new eval for edits 2025-04-25 09:41:33 +02:00
23 changed files with 2508 additions and 1024 deletions

9
Cargo.lock generated
View File

@@ -710,24 +710,31 @@ dependencies = [
"collections",
"component",
"editor",
"fs",
"futures 0.3.31",
"gpui",
"gpui_tokio",
"handlebars 4.5.0",
"html_to_markdown",
"http_client",
"indoc",
"itertools 0.14.0",
"language",
"language_model",
"language_models",
"linkme",
"open",
"pretty_assertions",
"project",
"rand 0.8.5",
"regex",
"reqwest_client",
"rust-embed",
"schemars",
"serde",
"serde_json",
"settings",
"smallvec",
"tree-sitter-rust",
"ui",
"unindent",
@@ -4982,6 +4989,7 @@ dependencies = [
"languages",
"node_runtime",
"paths",
"pretty_assertions",
"project",
"prompt_store",
"regex",
@@ -6346,6 +6354,7 @@ dependencies = [
"log",
"pest",
"pest_derive",
"rust-embed",
"serde",
"serde_json",
"thiserror 1.0.69",

View File

@@ -21,6 +21,7 @@ component.workspace = true
editor.workspace = true
futures.workspace = true
gpui.workspace = true
handlebars = { workspace = true, features = ["rust-embed"] }
html_to_markdown.workspace = true
http_client.workspace = true
indoc.workspace = true
@@ -31,9 +32,11 @@ linkme.workspace = true
open.workspace = true
project.workspace = true
regex.workspace = true
rust-embed.workspace = true
schemars.workspace = true
serde.workspace = true
serde_json.workspace = true
smallvec.workspace = true
ui.workspace = true
util.workspace = true
web_search.workspace = true
@@ -47,10 +50,14 @@ client = { workspace = true, features = ["test-support"] }
clock = { workspace = true, features = ["test-support"] }
collections = { workspace = true, features = ["test-support"] }
gpui = { workspace = true, features = ["test-support"] }
gpui_tokio.workspace = true
fs = { workspace = true, features = ["test-support"] }
language = { workspace = true, features = ["test-support"] }
language_models.workspace = true
project = { workspace = true, features = ["test-support"] }
rand.workspace = true
pretty_assertions.workspace = true
reqwest_client.workspace = true
settings = { workspace = true, features = ["test-support"] }
tree-sitter-rust.workspace = true
workspace = { workspace = true, features = ["test-support"] }

View File

@@ -7,6 +7,7 @@ mod create_directory_tool;
mod create_file_tool;
mod delete_path_tool;
mod diagnostics_tool;
mod edit_agent;
mod edit_file_tool;
mod fetch_tool;
mod find_path_tool;
@@ -17,9 +18,9 @@ mod now_tool;
mod open_tool;
mod read_file_tool;
mod rename_tool;
mod replace;
mod schema;
mod symbol_info_tool;
mod templates;
mod terminal_tool;
mod thinking_tool;
mod ui;
@@ -35,6 +36,8 @@ use language_model::LanguageModelRegistry;
use move_path_tool::MovePathTool;
use web_search_tool::WebSearchTool;
pub(crate) use templates::*;
use crate::batch_tool::BatchTool;
use crate::code_action_tool::CodeActionTool;
use crate::code_symbols_tool::CodeSymbolsTool;

View File

@@ -0,0 +1,526 @@
mod edit_parser;
use crate::{Template, Templates};
use anyhow::{Result, anyhow};
use assistant_tool::ActionLog;
use edit_parser::EditParser;
use futures::{Stream, StreamExt, stream};
use gpui::{AsyncApp, Entity};
use language::{Anchor, Bias, Buffer, BufferSnapshot};
use language_model::{
LanguageModel, LanguageModelRequest, LanguageModelRequestMessage, MessageContent, Role,
};
use serde::Serialize;
use smallvec::SmallVec;
use std::{ops::Range, path::PathBuf, sync::Arc};
#[derive(Serialize)]
pub struct EditAgentTemplate {
path: Option<PathBuf>,
file_content: String,
instructions: String,
}
impl Template for EditAgentTemplate {
const TEMPLATE_NAME: &'static str = "edit_agent.hbs";
}
pub struct EditAgent {
model: Arc<dyn LanguageModel>,
action_log: Entity<ActionLog>,
templates: Arc<Templates>,
}
impl EditAgent {
pub fn new(
model: Arc<dyn LanguageModel>,
action_log: Entity<ActionLog>,
templates: Arc<Templates>,
) -> Self {
EditAgent {
model,
action_log,
templates,
}
}
pub async fn edit(
&self,
buffer: Entity<Buffer>,
instructions: String,
cx: &mut AsyncApp,
) -> Result<()> {
let edits = self.stream_edits(buffer.clone(), instructions, cx).await?;
self.apply_edits(buffer, edits, cx).await?;
Ok(())
}
async fn apply_edits(
&self,
buffer: Entity<Buffer>,
edits: impl Stream<Item = Result<(Range<Anchor>, String)>>,
cx: &mut AsyncApp,
) -> Result<()> {
// todo!("group all edits into one transaction")
// todo!("add tests for this")
// Ensure the buffer is tracked by the action log.
self.action_log
.update(cx, |log, cx| log.track_buffer(buffer.clone(), cx))?;
futures::pin_mut!(edits);
while let Some(edit) = edits.next().await {
let (range, content) = edit?;
// Edit the buffer and report the edit as part of the same effect cycle, otherwise
// the edit will be reported as if the user made it.
cx.update(|cx| {
buffer.update(cx, |buffer, cx| buffer.edit([(range, content)], None, cx));
self.action_log
.update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx))
})?;
}
Ok(())
}
async fn stream_edits(
&self,
buffer: Entity<Buffer>,
instructions: String,
cx: &mut AsyncApp,
) -> Result<impl use<> + Stream<Item = Result<(Range<Anchor>, String)>>> {
println!("{}\n\n", instructions);
let snapshot = buffer.read_with(cx, |buffer, _| buffer.snapshot())?;
let path = cx.update(|cx| snapshot.resolve_file_path(cx, true))?;
// todo!("move to background")
let prompt = EditAgentTemplate {
path,
file_content: snapshot.text(),
instructions,
}
.render(&self.templates)?;
let request = LanguageModelRequest {
messages: vec![LanguageModelRequestMessage {
role: Role::User,
content: vec![MessageContent::Text(prompt)],
cache: false,
}],
..Default::default()
};
let mut parser = EditParser::new();
let stream = self.model.stream_completion_text(request, cx).await?.stream;
Ok(stream.flat_map(move |chunk| {
let mut edits = SmallVec::new();
let mut error = None;
let snapshot = snapshot.clone();
match chunk {
Ok(chunk) => edits = parser.push(&chunk),
Err(err) => error = Some(Err(anyhow!(err))),
}
stream::iter(
edits
.into_iter()
.map(move |edit| {
dbg!(&edit);
let range = Self::resolve_location(&snapshot, &edit.old_text);
Ok((range, edit.new_text))
})
.chain(error),
)
}))
}
fn resolve_location(buffer: &BufferSnapshot, search_query: &str) -> Range<Anchor> {
const INSERTION_COST: u32 = 3;
const DELETION_COST: u32 = 10;
const WHITESPACE_INSERTION_COST: u32 = 1;
const WHITESPACE_DELETION_COST: u32 = 1;
let buffer_len = buffer.len();
let query_len = search_query.len();
let mut matrix = SearchMatrix::new(query_len + 1, buffer_len + 1);
let mut leading_deletion_cost = 0_u32;
for (row, query_byte) in search_query.bytes().enumerate() {
let deletion_cost = if query_byte.is_ascii_whitespace() {
WHITESPACE_DELETION_COST
} else {
DELETION_COST
};
leading_deletion_cost = leading_deletion_cost.saturating_add(deletion_cost);
matrix.set(
row + 1,
0,
SearchState::new(leading_deletion_cost, SearchDirection::Diagonal),
);
for (col, buffer_byte) in buffer.bytes_in_range(0..buffer.len()).flatten().enumerate() {
let insertion_cost = if buffer_byte.is_ascii_whitespace() {
WHITESPACE_INSERTION_COST
} else {
INSERTION_COST
};
let up = SearchState::new(
matrix.get(row, col + 1).cost.saturating_add(deletion_cost),
SearchDirection::Up,
);
let left = SearchState::new(
matrix.get(row + 1, col).cost.saturating_add(insertion_cost),
SearchDirection::Left,
);
let diagonal = SearchState::new(
if query_byte == *buffer_byte {
matrix.get(row, col).cost
} else {
matrix
.get(row, col)
.cost
.saturating_add(deletion_cost + insertion_cost)
},
SearchDirection::Diagonal,
);
matrix.set(row + 1, col + 1, up.min(left).min(diagonal));
}
}
// Traceback to find the best match
let mut best_buffer_end = buffer_len;
let mut best_cost = u32::MAX;
for col in 1..=buffer_len {
let cost = matrix.get(query_len, col).cost;
if cost < best_cost {
best_cost = cost;
best_buffer_end = col;
}
}
let mut query_ix = query_len;
let mut buffer_ix = best_buffer_end;
while query_ix > 0 && buffer_ix > 0 {
let current = matrix.get(query_ix, buffer_ix);
match current.direction {
SearchDirection::Diagonal => {
query_ix -= 1;
buffer_ix -= 1;
}
SearchDirection::Up => {
query_ix -= 1;
}
SearchDirection::Left => {
buffer_ix -= 1;
}
}
}
let mut start = buffer.offset_to_point(buffer.clip_offset(buffer_ix, Bias::Left));
start.column = 0;
let mut end = buffer.offset_to_point(buffer.clip_offset(best_buffer_end, Bias::Right));
if end.column > 0 {
end.column = buffer.line_len(end.row);
}
buffer.anchor_after(start)..buffer.anchor_before(end)
}
}
#[derive(Clone, Copy, Debug, PartialEq, Eq, PartialOrd, Ord)]
enum SearchDirection {
Up,
Left,
Diagonal,
}
#[derive(Copy, Clone, PartialEq, Eq, PartialOrd, Ord)]
struct SearchState {
cost: u32,
direction: SearchDirection,
}
impl SearchState {
fn new(cost: u32, direction: SearchDirection) -> Self {
Self { cost, direction }
}
}
struct SearchMatrix {
cols: usize,
data: Vec<SearchState>,
}
impl SearchMatrix {
fn new(rows: usize, cols: usize) -> Self {
SearchMatrix {
cols,
data: vec![SearchState::new(0, SearchDirection::Diagonal); rows * cols],
}
}
fn get(&self, row: usize, col: usize) -> SearchState {
self.data[row * self.cols + col]
}
fn set(&mut self, row: usize, col: usize, cost: SearchState) {
self.data[row * self.cols + col] = cost;
}
}
#[cfg(test)]
mod tests {
use super::*;
use client::{Client, UserStore};
use collections::HashSet;
use fs::FakeFs;
use gpui::{AppContext, TestAppContext};
use indoc::indoc;
use language_model::LanguageModelRegistry;
use project::Project;
use rand::prelude::*;
use reqwest_client::ReqwestClient;
use serde_json::json;
use std::{fmt::Write as _, io::Write as _, path::Path, sync::mpsc};
use util::path;
#[test]
fn test_delete_run_git_blame() {
eval(
100,
0.9,
Eval {
input_path: "root/blame.rs".into(),
input_content: include_str!("fixtures/delete_run_git_blame/before.rs").into(),
instructions: indoc! {r#"
Let's delete the `run_git_blame` function while keeping all other code intact:
// ... existing code ...
const GIT_BLAME_NO_COMMIT_ERROR: &str = "fatal: no such ref: HEAD";
const GIT_BLAME_NO_PATH: &str = "fatal: no such path";
#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)]
pub struct BlameEntry {
// ... existing code ...
"#}
.into(),
expected_output: include_str!("fixtures/delete_run_git_blame/after.rs").into(),
},
);
}
#[test]
fn test_extract_handle_command_output() {
eval(
100,
0.9,
Eval {
input_path: "root/blame.rs".into(),
input_content: include_str!("fixtures/extract_handle_command_output/before.rs").into(),
instructions: indoc! {r#"
Extract `handle_command_output` method from `run_git_blame`.
// ... existing code ...
async fn run_git_blame(
git_binary: &Path,
working_directory: &Path,
path: &Path,
contents: &Rope,
) -> Result<String> {
let mut child = util::command::new_smol_command(git_binary)
.current_dir(working_directory)
.arg("blame")
.arg("--incremental")
.arg("--contents")
.arg("-")
.arg(path.as_os_str())
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| anyhow!("Failed to start git blame process: {}", e))?;
let stdin = child
.stdin
.as_mut()
.context("failed to get pipe to stdin of git blame command")?;
for chunk in contents.chunks() {
stdin.write_all(chunk.as_bytes()).await?;
}
stdin.flush().await?;
let output = child
.output()
.await
.map_err(|e| anyhow!("Failed to read git blame output: {}", e))?;
handle_command_output(output)
}
fn handle_command_output(output: std::process::Output) -> Result<String> {
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let trimmed = stderr.trim();
if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) {
return Ok(String::new());
}
return Err(anyhow!("git blame process failed: {}", stderr));
}
Ok(String::from_utf8(output.stdout)?)
}
// ... existing code ...
"#}
.into(),
expected_output: include_str!("fixtures/extract_handle_command_output/after.rs").into()
},
);
}
#[derive(Clone)]
struct Eval {
input_path: PathBuf,
input_content: String,
instructions: String,
expected_output: String,
}
fn eval(iterations: usize, expected_pass_ratio: f32, eval: Eval) {
let executor = gpui::background_executor();
let (tx, rx) = mpsc::channel();
for _ in 0..iterations {
let eval = eval.clone();
let tx = tx.clone();
executor
.spawn(async move {
let dispatcher = gpui::TestDispatcher::new(StdRng::from_entropy());
let mut cx = TestAppContext::build(dispatcher, None);
let output = cx.executor().block_test(async {
let test = agent_test(&mut cx).await;
apply_edits(
eval.input_path,
eval.input_content,
eval.instructions,
&test,
&mut cx,
)
.await
});
tx.send(output).unwrap();
})
.detach();
}
drop(tx);
let mut evaluated_count = 0;
report_progress(evaluated_count, iterations);
let mut failed_count = 0;
let mut failed_message = String::new();
let mut failed_outputs = HashSet::default();
while let Ok(output) = rx.recv() {
if output != eval.expected_output {
failed_count += 1;
if failed_outputs.insert(output.clone()) {
writeln!(
failed_message,
"=======\n{}\n=======",
pretty_assertions::StrComparison::new(&output, &eval.expected_output)
)
.unwrap();
}
}
evaluated_count += 1;
report_progress(evaluated_count, iterations);
}
let actual_pass_ratio = (iterations - failed_count) as f32 / iterations as f32;
println!("Actual pass ratio: {}\n", actual_pass_ratio);
assert!(
actual_pass_ratio >= expected_pass_ratio,
"Expected pass ratio: {}\nActual pass ratio: {}\nFailures: {}",
expected_pass_ratio,
actual_pass_ratio,
failed_message
);
}
fn report_progress(evaluated_count: usize, iterations: usize) {
print!("\r\x1b[KEvaluated {}/{}", evaluated_count, iterations);
std::io::stdout().flush().unwrap();
}
async fn apply_edits(
path: impl AsRef<Path>,
content: impl Into<Arc<str>>,
instructions: impl Into<String>,
test: &EditAgentTest,
cx: &mut TestAppContext,
) -> String {
let path = test
.project
.read_with(cx, |project, cx| project.find_project_path(path, cx))
.unwrap();
let buffer = test
.project
.update(cx, |project, cx| project.open_buffer(path, cx))
.await
.unwrap();
buffer.update(cx, |buffer, cx| buffer.set_text(content, cx));
test.agent
.edit(buffer.clone(), instructions.into(), &mut cx.to_async())
.await
.unwrap();
buffer.update(cx, |buffer, _cx| buffer.text())
}
struct EditAgentTest {
agent: EditAgent,
project: Entity<Project>,
}
async fn agent_test(cx: &mut TestAppContext) -> EditAgentTest {
cx.executor().allow_parking();
cx.update(settings::init);
cx.update(Project::init_settings);
cx.update(language::init);
cx.update(gpui_tokio::init);
cx.update(client::init_settings);
let fs = FakeFs::new(cx.executor().clone());
fs.insert_tree("/root", json!({})).await;
let project = Project::test(fs.clone(), [path!("/root").as_ref()], cx).await;
let model = cx
.update(|cx| {
let http_client = ReqwestClient::user_agent("agent tests").unwrap();
cx.set_http_client(Arc::new(http_client));
let client = Client::production(cx);
let user_store = cx.new(|cx| UserStore::new(client.clone(), cx));
language_model::init(client.clone(), cx);
language_models::init(user_store.clone(), client.clone(), fs.clone(), cx);
let models = LanguageModelRegistry::read_global(cx);
let model = models
.available_models(cx)
.find(|model| model.id().0 == "gemini-2.0-flash")
.unwrap();
let provider = models.provider(&model.provider_id()).unwrap();
let authenticated = provider.authenticate(cx);
cx.spawn(async move |_| {
authenticated.await.unwrap();
model
})
})
.await;
let action_log = cx.new(|_| ActionLog::new(project.clone()));
EditAgentTest {
agent: EditAgent::new(model, action_log, Templates::new()),
project,
}
}
}

View File

@@ -0,0 +1,246 @@
use smallvec::SmallVec;
use std::mem;
#[derive(Debug, Eq, PartialEq)]
pub struct Edit {
pub old_text: String,
pub new_text: String,
}
#[derive(Debug)]
pub struct EditParser {
state: EditParserState,
buffer: String,
}
#[derive(Debug, PartialEq)]
enum EditParserState {
Pending,
WithinOldText,
AfterOldText { old_text: String },
WithinNewText { old_text: String },
}
impl EditParser {
pub fn new() -> Self {
EditParser {
state: EditParserState::Pending,
buffer: String::new(),
}
}
pub fn push(&mut self, chunk: &str) -> SmallVec<[Edit; 1]> {
self.buffer.push_str(chunk);
let mut edits = SmallVec::new();
loop {
match &mut self.state {
EditParserState::Pending => {
if let Some(start) = self.buffer.find("<old_text>") {
self.buffer.drain(..start + "<old_text>".len());
self.state = EditParserState::WithinOldText;
} else {
break;
}
}
EditParserState::WithinOldText => {
if let Some(end) = self.buffer.find("</old_text>") {
let mut start = 0;
if self.buffer.starts_with('\n') {
start = 1;
}
let mut old_text = self.buffer[start..end].to_string();
if old_text.ends_with('\n') {
old_text.pop();
}
self.buffer.drain(..end + "</old_text>".len());
self.state = EditParserState::AfterOldText { old_text };
} else {
break;
}
}
EditParserState::AfterOldText { old_text } => {
if let Some(start) = self.buffer.find("<new_text>") {
self.buffer.drain(..start + "<new_text>".len());
self.state = EditParserState::WithinNewText {
old_text: mem::take(old_text),
};
} else {
break;
}
}
EditParserState::WithinNewText { old_text } => {
if let Some(end) = self.buffer.find("</new_text>") {
let mut start = 0;
if self.buffer.starts_with('\n') {
start = 1;
}
let mut new_text = self.buffer[start..end].to_string();
if new_text.ends_with('\n') {
new_text.pop();
}
edits.push(Edit {
old_text: mem::take(old_text),
new_text,
});
self.buffer.drain(..end + "</new_text>".len());
self.state = EditParserState::Pending;
} else {
break;
}
}
}
}
edits
}
}
#[cfg(test)]
mod tests {
use super::*;
use indoc::indoc;
use rand::prelude::*;
use std::cmp;
#[gpui::test(iterations = 1000)]
fn test_single_edit(mut rng: StdRng) {
assert_eq!(
parse(
"<old_text>original</old_text><new_text>updated</new_text>",
&mut rng
),
vec![Edit {
old_text: "original".to_string(),
new_text: "updated".to_string(),
}]
)
}
#[gpui::test(iterations = 1000)]
fn test_multiple_edits(mut rng: StdRng) {
assert_eq!(
parse(
indoc! {"
<old_text>
first old
</old_text><new_text>first new</new_text>
<old_text>second old</old_text><new_text>
second new
</new_text>
"},
&mut rng
),
vec![
Edit {
old_text: "first old".to_string(),
new_text: "first new".to_string(),
},
Edit {
old_text: "second old".to_string(),
new_text: "second new".to_string(),
},
]
);
}
#[gpui::test(iterations = 1000)]
fn test_edits_with_extra_text(mut rng: StdRng) {
assert_eq!(
parse(
indoc! {"
ignore this <old_text>
content</old_text>extra stuff<new_text>updated content</new_text>trailing data
more text <old_text>second item
</old_text>middle text<new_text>modified second item</new_text>end
<old_text>third case</old_text><new_text>improved third case</new_text> with trailing text
"},
&mut rng
),
vec![
Edit {
old_text: "content".to_string(),
new_text: "updated content".to_string(),
},
Edit {
old_text: "second item".to_string(),
new_text: "modified second item".to_string(),
},
Edit {
old_text: "third case".to_string(),
new_text: "improved third case".to_string(),
},
]
);
}
#[gpui::test(iterations = 1000)]
fn test_nested_tags(mut rng: StdRng) {
assert_eq!(
parse(
"<old_text>code with <tag>nested</tag> elements</old_text><new_text>new <code>content</code></new_text>",
&mut rng
),
vec![Edit {
old_text: "code with <tag>nested</tag> elements".to_string(),
new_text: "new <code>content</code>".to_string(),
}]
);
}
#[gpui::test(iterations = 1000)]
fn test_empty_old_and_new_text(mut rng: StdRng) {
assert_eq!(
parse("<old_text></old_text><new_text></new_text>", &mut rng),
vec![Edit {
old_text: "".to_string(),
new_text: "".to_string(),
}]
);
}
#[gpui::test(iterations = 1000)]
fn test_with_special_characters(mut rng: StdRng) {
assert_eq!(
parse(
"<old_text>function(x) { return x * 2; }</old_text><new_text>function(x) { return x ** 2; }</new_text>",
&mut rng
),
vec![Edit {
old_text: "function(x) { return x * 2; }".to_string(),
new_text: "function(x) { return x ** 2; }".to_string(),
}]
);
}
#[gpui::test(iterations = 100)]
fn test_multiline_content(mut rng: StdRng) {
assert_eq!(
parse(
"<old_text>line1\nline2\nline3</old_text><new_text>line1\nmodified line2\nline3</new_text>",
&mut rng
),
vec![Edit {
old_text: "line1\nline2\nline3".to_string(),
new_text: "line1\nmodified line2\nline3".to_string(),
}]
);
}
fn parse(input: &str, rng: &mut StdRng) -> Vec<Edit> {
let mut parser = EditParser::new();
let chunk_count = rng.gen_range(1..=cmp::min(input.len(), 50));
let mut chunk_indices = (0..input.len()).choose_multiple(rng, chunk_count);
chunk_indices.sort();
let mut edits = Vec::new();
let mut last_ix = 0;
for chunk_ix in chunk_indices {
edits.extend(parser.push(&input[last_ix..chunk_ix]));
last_ix = chunk_ix;
}
edits.extend(parser.push(&input[last_ix..]));
edits
}
}

View File

@@ -1,8 +1,5 @@
use crate::{
replace::{replace_exact, replace_with_flexible_indent},
schema::json_schema_for,
};
use anyhow::{Context as _, Result, anyhow};
use crate::{Templates, edit_agent::EditAgent, schema::json_schema_for};
use anyhow::{Result, anyhow};
use assistant_tool::{ActionLog, AnyToolCard, Tool, ToolCard, ToolResult, ToolUseStatus};
use buffer_diff::{BufferDiff, BufferDiffSnapshot};
use editor::{Editor, EditorMode, MultiBuffer, PathKey};
@@ -12,7 +9,9 @@ use gpui::{
use language::{
Anchor, Buffer, Capability, LanguageRegistry, LineEnding, OffsetRangeExt, Rope, TextBuffer,
};
use language_model::{LanguageModelRequestMessage, LanguageModelToolSchemaFormat};
use language_model::{
LanguageModelRegistry, LanguageModelRequestMessage, LanguageModelToolSchemaFormat,
};
use project::Project;
use schemars::JsonSchema;
use serde::{Deserialize, Serialize};
@@ -26,7 +25,7 @@ use workspace::Workspace;
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
pub struct EditFileToolInput {
/// A user-friendly markdown description of what's being replaced. This will be shown in the UI.
/// A user-friendly markdown description of the edit. This will be shown in the UI.
///
/// <example>Fix API endpoint URLs</example>
/// <example>Update copyright year in `page_footer`</example>
@@ -56,11 +55,35 @@ pub struct EditFileToolInput {
/// </example>
pub path: PathBuf,
/// The text to replace.
pub old_string: String,
/// The text to replace it with.
pub new_string: String,
/// Edit instructions that will be interpreted by a less intelligent model,
/// which will quickly apply the edits. You should make it clear what the
/// edits are, while also minimizing the unchanged code you write. The model
/// does not have access to this conversation, so you must make sure the
/// instructions are self-contained and do not rely on external context.
///
/// Insert `// ... existing code ...` comments in your output to represent
/// unchanged code ABOVE, BELOW, and IN BETWEEN edited lines.
///
/// Bias towards repeating as few lines of the original file as possible to
/// convey the change. However, each edit should contain sufficient context
/// of unchanged lines to resolve ambiguity. When you want to delete a piece
/// of code, indicate a few lines above and below the code you want to
/// delete (surrounded by `// ... existing code ...`).
///
/// Never forget to include `// ... existing code ...` comments to represent
/// unchanged lines, otherwise the small model may not understand the
/// context of your edit and will delete important code!
///
/// <your_output>
/// // ... existing code ...
/// FIRST_EDIT
/// // ... existing code ...
/// SECOND_EDIT
/// // ... existing code ...
/// THIRD_EDIT
/// // ... existing code ...
/// </your_output>
pub edit_instructions: String,
}
#[derive(Debug, Serialize, Deserialize, JsonSchema)]
@@ -69,10 +92,6 @@ struct PartialInput {
path: String,
#[serde(default)]
display_description: String,
#[serde(default)]
old_string: String,
#[serde(default)]
new_string: String,
}
pub struct EditFileTool;
@@ -137,6 +156,23 @@ impl Tool for EditFileTool {
Err(err) => return Task::ready(Err(anyhow!(err))).into(),
};
let Some(project_path) = project.read(cx).find_project_path(&input.path, cx) else {
return Task::ready(Err(anyhow!(
"Path {} not found in project",
input.path.display()
)))
.into();
};
let Some(worktree) = project
.read(cx)
.worktree_for_id(project_path.worktree_id, cx)
else {
return Task::ready(Err(anyhow!("Worktree not found for project path"))).into();
};
let exists = worktree.update(cx, |worktree, cx| {
worktree.file_exists(&project_path.path, cx)
});
let card = window.and_then(|window| {
window
.update(cx, |_, window, cx| {
@@ -148,12 +184,22 @@ impl Tool for EditFileTool {
});
let card_clone = card.clone();
// todo!("read model from settings...")
let models = LanguageModelRegistry::read_global(cx);
let model = models
.available_models(cx)
.find(|model| model.id().0 == "gemini-2.0-flash")
.unwrap();
let provider = models.provider(&model.provider_id()).unwrap();
let authenticated = provider.authenticate(cx);
// todo!("reuse templates")
let edit_agent = EditAgent::new(model, action_log, Templates::new());
let task = cx.spawn(async move |cx: &mut AsyncApp| {
let project_path = project.read_with(cx, |project, cx| {
project
.find_project_path(&input.path, cx)
.context("Path not found in project")
})??;
authenticated.await?;
if !exists.await? {
return Err(anyhow!("{} not found", input.path.display()));
}
let buffer = project
.update(cx, |project, cx| {
@@ -161,90 +207,27 @@ impl Tool for EditFileTool {
})?
.await?;
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
if input.old_string.is_empty() {
return Err(anyhow!(
"`old_string` can't be empty, use another tool if you want to create a file."
));
}
if input.old_string == input.new_string {
return Err(anyhow!(
"The `old_string` and `new_string` are identical, so no changes would be made."
));
}
let result = cx
.background_spawn(async move {
// Try to match exactly
let diff = replace_exact(&input.old_string, &input.new_string, &snapshot)
.await
// If that fails, try being flexible about indentation
.or_else(|| {
replace_with_flexible_indent(
&input.old_string,
&input.new_string,
&snapshot,
)
})?;
if diff.edits.is_empty() {
return None;
}
let old_text = snapshot.text();
Some((old_text, diff))
})
.await;
let Some((old_text, diff)) = result else {
let err = buffer.read_with(cx, |buffer, _cx| {
let file_exists = buffer
.file()
.map_or(false, |file| file.disk_state().exists());
if !file_exists {
anyhow!("{} does not exist", input.path.display())
} else if buffer.is_empty() {
anyhow!(
"{} is empty, so the provided `old_string` wasn't found.",
input.path.display()
)
} else {
anyhow!("Failed to match the provided `old_string`")
}
})?;
return Err(err);
};
let snapshot = cx.update(|cx| {
action_log.update(cx, |log, cx| log.track_buffer(buffer.clone(), cx));
let snapshot = buffer.update(cx, |buffer, cx| {
buffer.finalize_last_transaction();
buffer.apply_diff(diff, cx);
buffer.finalize_last_transaction();
buffer.snapshot()
});
action_log.update(cx, |log, cx| log.buffer_edited(buffer.clone(), cx));
snapshot
})?;
let old_snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
edit_agent
.edit(buffer.clone(), input.edit_instructions.clone(), cx)
.await?;
let new_snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot())?;
project
.update(cx, |project, cx| project.save_buffer(buffer, cx))?
.await?;
let new_text = snapshot.text();
let diff_str = cx
.background_spawn({
let old_text = old_text.clone();
let new_text = new_text.clone();
async move { language::unified_diff(&old_text, &new_text) }
})
.await;
let old_text = cx.background_spawn({
let old_snapshot = old_snapshot.clone();
async move { old_snapshot.text() }
});
let new_text = cx.background_spawn({
let new_snapshot = new_snapshot.clone();
async move { new_snapshot.text() }
});
let diff = cx.background_spawn(async move {
language::unified_diff(&old_snapshot.text(), &new_snapshot.text())
});
let (old_text, new_text, diff) = futures::join!(old_text, new_text, diff);
if let Some(card) = card_clone {
card.update(cx, |card, cx| {
@@ -256,7 +239,7 @@ impl Tool for EditFileTool {
Ok(format!(
"Edited {}:\n\n```diff\n{}\n```",
input.path.display(),
diff_str
diff
))
});
@@ -620,6 +603,7 @@ async fn build_buffer_diff(
})
}
// todo!("add unit tests for failure modes of edit, like file not found, etc.")
#[cfg(test)]
mod tests {
use super::*;

View File

@@ -7,39 +7,4 @@ Before using this tool:
2. Verify the directory path is correct (only applicable when creating new files):
- Use the `list_directory` tool to verify the parent directory exists and is the correct location
To make a file edit, provide the following:
1. path: The full path to the file you wish to modify in the project. This path must include the root directory in the project.
2. old_string: The text to replace (must be unique within the file, and must match the file contents exactly, including all whitespace and indentation)
3. new_string: The edited text, which will replace the old_string in the file.
The tool will replace ONE occurrence of old_string with new_string in the specified file.
CRITICAL REQUIREMENTS FOR USING THIS TOOL:
1. UNIQUENESS: The old_string MUST uniquely identify the specific instance you want to change. This means:
- Include AT LEAST 3-5 lines of context BEFORE the change point
- Include AT LEAST 3-5 lines of context AFTER the change point
- Include all whitespace, indentation, and surrounding code exactly as it appears in the file
2. SINGLE INSTANCE: This tool can only change ONE instance at a time. If you need to change multiple instances:
- Make separate calls to this tool for each instance
- Each call must uniquely identify its specific instance using extensive context
3. VERIFICATION: Before using this tool:
- Check how many instances of the target text exist in the file
- If multiple instances exist, gather enough context to uniquely identify each one
- Plan separate tool calls for each instance
WARNING: If you do not follow these requirements:
- The tool will fail if old_string matches multiple locations
- The tool will fail if old_string doesn't match exactly (including whitespace)
- You may change the wrong instance if you don't include enough context
When making edits:
- Ensure the edit results in idiomatic, correct code
- Do not leave the code in a broken state
- Always use fully-qualified project paths (starting with the name of one of the project's root directories)
If you want to create a new file, use the `create_file` tool instead of this tool. Don't pass an empty `old_string`.
Remember: when making multiple file edits in a row to the same file, you should prefer to send all edits in a single message with multiple calls to this tool, rather than multiple messages with a single call each.
Group coherent edits together and include all of them in a single call to this tool. Add the full context needed for a small model to understand the edits.

View File

@@ -0,0 +1,328 @@
use crate::commit::get_messages;
use crate::{GitRemote, Oid};
use anyhow::{Context as _, Result, anyhow};
use collections::{HashMap, HashSet};
use futures::AsyncWriteExt;
use gpui::SharedString;
use serde::{Deserialize, Serialize};
use std::process::Stdio;
use std::{ops::Range, path::Path};
use text::Rope;
use time::OffsetDateTime;
use time::UtcOffset;
use time::macros::format_description;
pub use git2 as libgit;
#[derive(Debug, Clone, Default)]
pub struct Blame {
pub entries: Vec<BlameEntry>,
pub messages: HashMap<Oid, String>,
pub remote_url: Option<String>,
}
#[derive(Clone, Debug, Default)]
pub struct ParsedCommitMessage {
pub message: SharedString,
pub permalink: Option<url::Url>,
pub pull_request: Option<crate::hosting_provider::PullRequest>,
pub remote: Option<GitRemote>,
}
impl Blame {
pub async fn for_path(
git_binary: &Path,
working_directory: &Path,
path: &Path,
content: &Rope,
remote_url: Option<String>,
) -> Result<Self> {
let output = run_git_blame(git_binary, working_directory, path, content).await?;
let mut entries = parse_git_blame(&output)?;
entries.sort_unstable_by(|a, b| a.range.start.cmp(&b.range.start));
let mut unique_shas = HashSet::default();
for entry in entries.iter_mut() {
unique_shas.insert(entry.sha);
}
let shas = unique_shas.into_iter().collect::<Vec<_>>();
let messages = get_messages(working_directory, &shas)
.await
.context("failed to get commit messages")?;
Ok(Self {
entries,
messages,
remote_url,
})
}
}
const GIT_BLAME_NO_COMMIT_ERROR: &str = "fatal: no such ref: HEAD";
const GIT_BLAME_NO_PATH: &str = "fatal: no such path";
#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)]
pub struct BlameEntry {
pub sha: Oid,
pub range: Range<u32>,
pub original_line_number: u32,
pub author: Option<String>,
pub author_mail: Option<String>,
pub author_time: Option<i64>,
pub author_tz: Option<String>,
pub committer_name: Option<String>,
pub committer_email: Option<String>,
pub committer_time: Option<i64>,
pub committer_tz: Option<String>,
pub summary: Option<String>,
pub previous: Option<String>,
pub filename: String,
}
impl BlameEntry {
// Returns a BlameEntry by parsing the first line of a `git blame --incremental`
// entry. The line MUST have this format:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
fn new_from_blame_line(line: &str) -> Result<BlameEntry> {
let mut parts = line.split_whitespace();
let sha = parts
.next()
.and_then(|line| line.parse::<Oid>().ok())
.ok_or_else(|| anyhow!("failed to parse sha"))?;
let original_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse original line number"))?;
let final_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let line_count = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let start_line = final_line_number.saturating_sub(1);
let end_line = start_line + line_count;
let range = start_line..end_line;
Ok(Self {
sha,
range,
original_line_number,
..Default::default()
})
}
pub fn author_offset_date_time(&self) -> Result<time::OffsetDateTime> {
if let (Some(author_time), Some(author_tz)) = (self.author_time, &self.author_tz) {
let format = format_description!("[offset_hour][offset_minute]");
let offset = UtcOffset::parse(author_tz, &format)?;
let date_time_utc = OffsetDateTime::from_unix_timestamp(author_time)?;
Ok(date_time_utc.to_offset(offset))
} else {
// Directly return current time in UTC if there's no committer time or timezone
Ok(time::OffsetDateTime::now_utc())
}
}
}
// parse_git_blame parses the output of `git blame --incremental`, which returns
// all the blame-entries for a given path incrementally, as it finds them.
//
// Each entry *always* starts with:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
//
// Each entry *always* ends with:
//
// filename <whitespace-quoted-filename-goes-here>
//
// Line numbers are 1-indexed.
//
// A `git blame --incremental` entry looks like this:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 2 2 1
// author Joe Schmoe
// author-mail <joe.schmoe@example.com>
// author-time 1709741400
// author-tz +0100
// committer Joe Schmoe
// committer-mail <joe.schmoe@example.com>
// committer-time 1709741400
// committer-tz +0100
// summary Joe's cool commit
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// If the entry has the same SHA as an entry that was already printed then no
// signature information is printed:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 3 4 1
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// More about `--incremental` output: https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-blame.html
fn parse_git_blame(output: &str) -> Result<Vec<BlameEntry>> {
let mut entries: Vec<BlameEntry> = Vec::new();
let mut index: HashMap<Oid, usize> = HashMap::default();
let mut current_entry: Option<BlameEntry> = None;
for line in output.lines() {
let mut done = false;
match &mut current_entry {
None => {
let mut new_entry = BlameEntry::new_from_blame_line(line)?;
if let Some(existing_entry) = index
.get(&new_entry.sha)
.and_then(|slot| entries.get(*slot))
{
new_entry.author.clone_from(&existing_entry.author);
new_entry
.author_mail
.clone_from(&existing_entry.author_mail);
new_entry.author_time = existing_entry.author_time;
new_entry.author_tz.clone_from(&existing_entry.author_tz);
new_entry
.committer_name
.clone_from(&existing_entry.committer_name);
new_entry
.committer_email
.clone_from(&existing_entry.committer_email);
new_entry.committer_time = existing_entry.committer_time;
new_entry
.committer_tz
.clone_from(&existing_entry.committer_tz);
new_entry.summary.clone_from(&existing_entry.summary);
}
current_entry.replace(new_entry);
}
Some(entry) => {
let Some((key, value)) = line.split_once(' ') else {
continue;
};
let is_committed = !entry.sha.is_zero();
match key {
"filename" => {
entry.filename = value.into();
done = true;
}
"previous" => entry.previous = Some(value.into()),
"summary" if is_committed => entry.summary = Some(value.into()),
"author" if is_committed => entry.author = Some(value.into()),
"author-mail" if is_committed => entry.author_mail = Some(value.into()),
"author-time" if is_committed => {
entry.author_time = Some(value.parse::<i64>()?)
}
"author-tz" if is_committed => entry.author_tz = Some(value.into()),
"committer" if is_committed => entry.committer_name = Some(value.into()),
"committer-mail" if is_committed => entry.committer_email = Some(value.into()),
"committer-time" if is_committed => {
entry.committer_time = Some(value.parse::<i64>()?)
}
"committer-tz" if is_committed => entry.committer_tz = Some(value.into()),
_ => {}
}
}
};
if done {
if let Some(entry) = current_entry.take() {
index.insert(entry.sha, entries.len());
// We only want annotations that have a commit.
if !entry.sha.is_zero() {
entries.push(entry);
}
}
}
}
Ok(entries)
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::BlameEntry;
use super::parse_git_blame;
fn read_test_data(filename: &str) -> String {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push(filename);
std::fs::read_to_string(&path)
.unwrap_or_else(|_| panic!("Could not read test data at {:?}. Is it generated?", path))
}
fn assert_eq_golden(entries: &Vec<BlameEntry>, golden_filename: &str) {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push("golden");
path.push(format!("{}.json", golden_filename));
let mut have_json =
serde_json::to_string_pretty(&entries).expect("could not serialize entries to JSON");
// We always want to save with a trailing newline.
have_json.push('\n');
let update = std::env::var("UPDATE_GOLDEN")
.map(|val| val.eq_ignore_ascii_case("true"))
.unwrap_or(false);
if update {
std::fs::create_dir_all(path.parent().unwrap())
.expect("could not create golden test data directory");
std::fs::write(&path, have_json).expect("could not write out golden data");
} else {
let want_json =
std::fs::read_to_string(&path).unwrap_or_else(|_| {
panic!("could not read golden test data file at {:?}. Did you run the test with UPDATE_GOLDEN=true before?", path);
}).replace("\r\n", "\n");
pretty_assertions::assert_eq!(have_json, want_json, "wrong blame entries");
}
}
#[test]
fn test_parse_git_blame_not_committed() {
let output = read_test_data("blame_incremental_not_committed");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_not_committed");
}
#[test]
fn test_parse_git_blame_simple() {
let output = read_test_data("blame_incremental_simple");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_simple");
}
#[test]
fn test_parse_git_blame_complex() {
let output = read_test_data("blame_incremental_complex");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_complex");
}
}

View File

@@ -0,0 +1,374 @@
use crate::commit::get_messages;
use crate::{GitRemote, Oid};
use anyhow::{Context as _, Result, anyhow};
use collections::{HashMap, HashSet};
use futures::AsyncWriteExt;
use gpui::SharedString;
use serde::{Deserialize, Serialize};
use std::process::Stdio;
use std::{ops::Range, path::Path};
use text::Rope;
use time::OffsetDateTime;
use time::UtcOffset;
use time::macros::format_description;
pub use git2 as libgit;
#[derive(Debug, Clone, Default)]
pub struct Blame {
pub entries: Vec<BlameEntry>,
pub messages: HashMap<Oid, String>,
pub remote_url: Option<String>,
}
#[derive(Clone, Debug, Default)]
pub struct ParsedCommitMessage {
pub message: SharedString,
pub permalink: Option<url::Url>,
pub pull_request: Option<crate::hosting_provider::PullRequest>,
pub remote: Option<GitRemote>,
}
impl Blame {
pub async fn for_path(
git_binary: &Path,
working_directory: &Path,
path: &Path,
content: &Rope,
remote_url: Option<String>,
) -> Result<Self> {
let output = run_git_blame(git_binary, working_directory, path, content).await?;
let mut entries = parse_git_blame(&output)?;
entries.sort_unstable_by(|a, b| a.range.start.cmp(&b.range.start));
let mut unique_shas = HashSet::default();
for entry in entries.iter_mut() {
unique_shas.insert(entry.sha);
}
let shas = unique_shas.into_iter().collect::<Vec<_>>();
let messages = get_messages(working_directory, &shas)
.await
.context("failed to get commit messages")?;
Ok(Self {
entries,
messages,
remote_url,
})
}
}
const GIT_BLAME_NO_COMMIT_ERROR: &str = "fatal: no such ref: HEAD";
const GIT_BLAME_NO_PATH: &str = "fatal: no such path";
async fn run_git_blame(
git_binary: &Path,
working_directory: &Path,
path: &Path,
contents: &Rope,
) -> Result<String> {
let mut child = util::command::new_smol_command(git_binary)
.current_dir(working_directory)
.arg("blame")
.arg("--incremental")
.arg("--contents")
.arg("-")
.arg(path.as_os_str())
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| anyhow!("Failed to start git blame process: {}", e))?;
let stdin = child
.stdin
.as_mut()
.context("failed to get pipe to stdin of git blame command")?;
for chunk in contents.chunks() {
stdin.write_all(chunk.as_bytes()).await?;
}
stdin.flush().await?;
let output = child
.output()
.await
.map_err(|e| anyhow!("Failed to read git blame output: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let trimmed = stderr.trim();
if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) {
return Ok(String::new());
}
return Err(anyhow!("git blame process failed: {}", stderr));
}
Ok(String::from_utf8(output.stdout)?)
}
#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)]
pub struct BlameEntry {
pub sha: Oid,
pub range: Range<u32>,
pub original_line_number: u32,
pub author: Option<String>,
pub author_mail: Option<String>,
pub author_time: Option<i64>,
pub author_tz: Option<String>,
pub committer_name: Option<String>,
pub committer_email: Option<String>,
pub committer_time: Option<i64>,
pub committer_tz: Option<String>,
pub summary: Option<String>,
pub previous: Option<String>,
pub filename: String,
}
impl BlameEntry {
// Returns a BlameEntry by parsing the first line of a `git blame --incremental`
// entry. The line MUST have this format:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
fn new_from_blame_line(line: &str) -> Result<BlameEntry> {
let mut parts = line.split_whitespace();
let sha = parts
.next()
.and_then(|line| line.parse::<Oid>().ok())
.ok_or_else(|| anyhow!("failed to parse sha"))?;
let original_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse original line number"))?;
let final_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let line_count = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let start_line = final_line_number.saturating_sub(1);
let end_line = start_line + line_count;
let range = start_line..end_line;
Ok(Self {
sha,
range,
original_line_number,
..Default::default()
})
}
pub fn author_offset_date_time(&self) -> Result<time::OffsetDateTime> {
if let (Some(author_time), Some(author_tz)) = (self.author_time, &self.author_tz) {
let format = format_description!("[offset_hour][offset_minute]");
let offset = UtcOffset::parse(author_tz, &format)?;
let date_time_utc = OffsetDateTime::from_unix_timestamp(author_time)?;
Ok(date_time_utc.to_offset(offset))
} else {
// Directly return current time in UTC if there's no committer time or timezone
Ok(time::OffsetDateTime::now_utc())
}
}
}
// parse_git_blame parses the output of `git blame --incremental`, which returns
// all the blame-entries for a given path incrementally, as it finds them.
//
// Each entry *always* starts with:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
//
// Each entry *always* ends with:
//
// filename <whitespace-quoted-filename-goes-here>
//
// Line numbers are 1-indexed.
//
// A `git blame --incremental` entry looks like this:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 2 2 1
// author Joe Schmoe
// author-mail <joe.schmoe@example.com>
// author-time 1709741400
// author-tz +0100
// committer Joe Schmoe
// committer-mail <joe.schmoe@example.com>
// committer-time 1709741400
// committer-tz +0100
// summary Joe's cool commit
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// If the entry has the same SHA as an entry that was already printed then no
// signature information is printed:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 3 4 1
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// More about `--incremental` output: https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-blame.html
fn parse_git_blame(output: &str) -> Result<Vec<BlameEntry>> {
let mut entries: Vec<BlameEntry> = Vec::new();
let mut index: HashMap<Oid, usize> = HashMap::default();
let mut current_entry: Option<BlameEntry> = None;
for line in output.lines() {
let mut done = false;
match &mut current_entry {
None => {
let mut new_entry = BlameEntry::new_from_blame_line(line)?;
if let Some(existing_entry) = index
.get(&new_entry.sha)
.and_then(|slot| entries.get(*slot))
{
new_entry.author.clone_from(&existing_entry.author);
new_entry
.author_mail
.clone_from(&existing_entry.author_mail);
new_entry.author_time = existing_entry.author_time;
new_entry.author_tz.clone_from(&existing_entry.author_tz);
new_entry
.committer_name
.clone_from(&existing_entry.committer_name);
new_entry
.committer_email
.clone_from(&existing_entry.committer_email);
new_entry.committer_time = existing_entry.committer_time;
new_entry
.committer_tz
.clone_from(&existing_entry.committer_tz);
new_entry.summary.clone_from(&existing_entry.summary);
}
current_entry.replace(new_entry);
}
Some(entry) => {
let Some((key, value)) = line.split_once(' ') else {
continue;
};
let is_committed = !entry.sha.is_zero();
match key {
"filename" => {
entry.filename = value.into();
done = true;
}
"previous" => entry.previous = Some(value.into()),
"summary" if is_committed => entry.summary = Some(value.into()),
"author" if is_committed => entry.author = Some(value.into()),
"author-mail" if is_committed => entry.author_mail = Some(value.into()),
"author-time" if is_committed => {
entry.author_time = Some(value.parse::<i64>()?)
}
"author-tz" if is_committed => entry.author_tz = Some(value.into()),
"committer" if is_committed => entry.committer_name = Some(value.into()),
"committer-mail" if is_committed => entry.committer_email = Some(value.into()),
"committer-time" if is_committed => {
entry.committer_time = Some(value.parse::<i64>()?)
}
"committer-tz" if is_committed => entry.committer_tz = Some(value.into()),
_ => {}
}
}
};
if done {
if let Some(entry) = current_entry.take() {
index.insert(entry.sha, entries.len());
// We only want annotations that have a commit.
if !entry.sha.is_zero() {
entries.push(entry);
}
}
}
}
Ok(entries)
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::BlameEntry;
use super::parse_git_blame;
fn read_test_data(filename: &str) -> String {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push(filename);
std::fs::read_to_string(&path)
.unwrap_or_else(|_| panic!("Could not read test data at {:?}. Is it generated?", path))
}
fn assert_eq_golden(entries: &Vec<BlameEntry>, golden_filename: &str) {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push("golden");
path.push(format!("{}.json", golden_filename));
let mut have_json =
serde_json::to_string_pretty(&entries).expect("could not serialize entries to JSON");
// We always want to save with a trailing newline.
have_json.push('\n');
let update = std::env::var("UPDATE_GOLDEN")
.map(|val| val.eq_ignore_ascii_case("true"))
.unwrap_or(false);
if update {
std::fs::create_dir_all(path.parent().unwrap())
.expect("could not create golden test data directory");
std::fs::write(&path, have_json).expect("could not write out golden data");
} else {
let want_json =
std::fs::read_to_string(&path).unwrap_or_else(|_| {
panic!("could not read golden test data file at {:?}. Did you run the test with UPDATE_GOLDEN=true before?", path);
}).replace("\r\n", "\n");
pretty_assertions::assert_eq!(have_json, want_json, "wrong blame entries");
}
}
#[test]
fn test_parse_git_blame_not_committed() {
let output = read_test_data("blame_incremental_not_committed");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_not_committed");
}
#[test]
fn test_parse_git_blame_simple() {
let output = read_test_data("blame_incremental_simple");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_simple");
}
#[test]
fn test_parse_git_blame_complex() {
let output = read_test_data("blame_incremental_complex");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_complex");
}
}

View File

@@ -0,0 +1,378 @@
use crate::commit::get_messages;
use crate::{GitRemote, Oid};
use anyhow::{Context as _, Result, anyhow};
use collections::{HashMap, HashSet};
use futures::AsyncWriteExt;
use gpui::SharedString;
use serde::{Deserialize, Serialize};
use std::process::Stdio;
use std::{ops::Range, path::Path};
use text::Rope;
use time::OffsetDateTime;
use time::UtcOffset;
use time::macros::format_description;
pub use git2 as libgit;
#[derive(Debug, Clone, Default)]
pub struct Blame {
pub entries: Vec<BlameEntry>,
pub messages: HashMap<Oid, String>,
pub remote_url: Option<String>,
}
#[derive(Clone, Debug, Default)]
pub struct ParsedCommitMessage {
pub message: SharedString,
pub permalink: Option<url::Url>,
pub pull_request: Option<crate::hosting_provider::PullRequest>,
pub remote: Option<GitRemote>,
}
impl Blame {
pub async fn for_path(
git_binary: &Path,
working_directory: &Path,
path: &Path,
content: &Rope,
remote_url: Option<String>,
) -> Result<Self> {
let output = run_git_blame(git_binary, working_directory, path, content).await?;
let mut entries = parse_git_blame(&output)?;
entries.sort_unstable_by(|a, b| a.range.start.cmp(&b.range.start));
let mut unique_shas = HashSet::default();
for entry in entries.iter_mut() {
unique_shas.insert(entry.sha);
}
let shas = unique_shas.into_iter().collect::<Vec<_>>();
let messages = get_messages(working_directory, &shas)
.await
.context("failed to get commit messages")?;
Ok(Self {
entries,
messages,
remote_url,
})
}
}
const GIT_BLAME_NO_COMMIT_ERROR: &str = "fatal: no such ref: HEAD";
const GIT_BLAME_NO_PATH: &str = "fatal: no such path";
async fn run_git_blame(
git_binary: &Path,
working_directory: &Path,
path: &Path,
contents: &Rope,
) -> Result<String> {
let mut child = util::command::new_smol_command(git_binary)
.current_dir(working_directory)
.arg("blame")
.arg("--incremental")
.arg("--contents")
.arg("-")
.arg(path.as_os_str())
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| anyhow!("Failed to start git blame process: {}", e))?;
let stdin = child
.stdin
.as_mut()
.context("failed to get pipe to stdin of git blame command")?;
for chunk in contents.chunks() {
stdin.write_all(chunk.as_bytes()).await?;
}
stdin.flush().await?;
let output = child
.output()
.await
.map_err(|e| anyhow!("Failed to read git blame output: {}", e))?;
handle_command_output(output)
}
fn handle_command_output(output: std::process::Output) -> Result<String> {
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let trimmed = stderr.trim();
if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) {
return Ok(String::new());
}
return Err(anyhow!("git blame process failed: {}", stderr));
}
Ok(String::from_utf8(output.stdout)?)
}
#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)]
pub struct BlameEntry {
pub sha: Oid,
pub range: Range<u32>,
pub original_line_number: u32,
pub author: Option<String>,
pub author_mail: Option<String>,
pub author_time: Option<i64>,
pub author_tz: Option<String>,
pub committer_name: Option<String>,
pub committer_email: Option<String>,
pub committer_time: Option<i64>,
pub committer_tz: Option<String>,
pub summary: Option<String>,
pub previous: Option<String>,
pub filename: String,
}
impl BlameEntry {
// Returns a BlameEntry by parsing the first line of a `git blame --incremental`
// entry. The line MUST have this format:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
fn new_from_blame_line(line: &str) -> Result<BlameEntry> {
let mut parts = line.split_whitespace();
let sha = parts
.next()
.and_then(|line| line.parse::<Oid>().ok())
.ok_or_else(|| anyhow!("failed to parse sha"))?;
let original_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse original line number"))?;
let final_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let line_count = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let start_line = final_line_number.saturating_sub(1);
let end_line = start_line + line_count;
let range = start_line..end_line;
Ok(Self {
sha,
range,
original_line_number,
..Default::default()
})
}
pub fn author_offset_date_time(&self) -> Result<time::OffsetDateTime> {
if let (Some(author_time), Some(author_tz)) = (self.author_time, &self.author_tz) {
let format = format_description!("[offset_hour][offset_minute]");
let offset = UtcOffset::parse(author_tz, &format)?;
let date_time_utc = OffsetDateTime::from_unix_timestamp(author_time)?;
Ok(date_time_utc.to_offset(offset))
} else {
// Directly return current time in UTC if there's no committer time or timezone
Ok(time::OffsetDateTime::now_utc())
}
}
}
// parse_git_blame parses the output of `git blame --incremental`, which returns
// all the blame-entries for a given path incrementally, as it finds them.
//
// Each entry *always* starts with:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
//
// Each entry *always* ends with:
//
// filename <whitespace-quoted-filename-goes-here>
//
// Line numbers are 1-indexed.
//
// A `git blame --incremental` entry looks like this:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 2 2 1
// author Joe Schmoe
// author-mail <joe.schmoe@example.com>
// author-time 1709741400
// author-tz +0100
// committer Joe Schmoe
// committer-mail <joe.schmoe@example.com>
// committer-time 1709741400
// committer-tz +0100
// summary Joe's cool commit
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// If the entry has the same SHA as an entry that was already printed then no
// signature information is printed:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 3 4 1
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// More about `--incremental` output: https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-blame.html
fn parse_git_blame(output: &str) -> Result<Vec<BlameEntry>> {
let mut entries: Vec<BlameEntry> = Vec::new();
let mut index: HashMap<Oid, usize> = HashMap::default();
let mut current_entry: Option<BlameEntry> = None;
for line in output.lines() {
let mut done = false;
match &mut current_entry {
None => {
let mut new_entry = BlameEntry::new_from_blame_line(line)?;
if let Some(existing_entry) = index
.get(&new_entry.sha)
.and_then(|slot| entries.get(*slot))
{
new_entry.author.clone_from(&existing_entry.author);
new_entry
.author_mail
.clone_from(&existing_entry.author_mail);
new_entry.author_time = existing_entry.author_time;
new_entry.author_tz.clone_from(&existing_entry.author_tz);
new_entry
.committer_name
.clone_from(&existing_entry.committer_name);
new_entry
.committer_email
.clone_from(&existing_entry.committer_email);
new_entry.committer_time = existing_entry.committer_time;
new_entry
.committer_tz
.clone_from(&existing_entry.committer_tz);
new_entry.summary.clone_from(&existing_entry.summary);
}
current_entry.replace(new_entry);
}
Some(entry) => {
let Some((key, value)) = line.split_once(' ') else {
continue;
};
let is_committed = !entry.sha.is_zero();
match key {
"filename" => {
entry.filename = value.into();
done = true;
}
"previous" => entry.previous = Some(value.into()),
"summary" if is_committed => entry.summary = Some(value.into()),
"author" if is_committed => entry.author = Some(value.into()),
"author-mail" if is_committed => entry.author_mail = Some(value.into()),
"author-time" if is_committed => {
entry.author_time = Some(value.parse::<i64>()?)
}
"author-tz" if is_committed => entry.author_tz = Some(value.into()),
"committer" if is_committed => entry.committer_name = Some(value.into()),
"committer-mail" if is_committed => entry.committer_email = Some(value.into()),
"committer-time" if is_committed => {
entry.committer_time = Some(value.parse::<i64>()?)
}
"committer-tz" if is_committed => entry.committer_tz = Some(value.into()),
_ => {}
}
}
};
if done {
if let Some(entry) = current_entry.take() {
index.insert(entry.sha, entries.len());
// We only want annotations that have a commit.
if !entry.sha.is_zero() {
entries.push(entry);
}
}
}
}
Ok(entries)
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::BlameEntry;
use super::parse_git_blame;
fn read_test_data(filename: &str) -> String {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push(filename);
std::fs::read_to_string(&path)
.unwrap_or_else(|_| panic!("Could not read test data at {:?}. Is it generated?", path))
}
fn assert_eq_golden(entries: &Vec<BlameEntry>, golden_filename: &str) {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push("golden");
path.push(format!("{}.json", golden_filename));
let mut have_json =
serde_json::to_string_pretty(&entries).expect("could not serialize entries to JSON");
// We always want to save with a trailing newline.
have_json.push('\n');
let update = std::env::var("UPDATE_GOLDEN")
.map(|val| val.eq_ignore_ascii_case("true"))
.unwrap_or(false);
if update {
std::fs::create_dir_all(path.parent().unwrap())
.expect("could not create golden test data directory");
std::fs::write(&path, have_json).expect("could not write out golden data");
} else {
let want_json =
std::fs::read_to_string(&path).unwrap_or_else(|_| {
panic!("could not read golden test data file at {:?}. Did you run the test with UPDATE_GOLDEN=true before?", path);
}).replace("\r\n", "\n");
pretty_assertions::assert_eq!(have_json, want_json, "wrong blame entries");
}
}
#[test]
fn test_parse_git_blame_not_committed() {
let output = read_test_data("blame_incremental_not_committed");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_not_committed");
}
#[test]
fn test_parse_git_blame_simple() {
let output = read_test_data("blame_incremental_simple");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_simple");
}
#[test]
fn test_parse_git_blame_complex() {
let output = read_test_data("blame_incremental_complex");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_complex");
}
}

View File

@@ -0,0 +1,374 @@
use crate::commit::get_messages;
use crate::{GitRemote, Oid};
use anyhow::{Context as _, Result, anyhow};
use collections::{HashMap, HashSet};
use futures::AsyncWriteExt;
use gpui::SharedString;
use serde::{Deserialize, Serialize};
use std::process::Stdio;
use std::{ops::Range, path::Path};
use text::Rope;
use time::OffsetDateTime;
use time::UtcOffset;
use time::macros::format_description;
pub use git2 as libgit;
#[derive(Debug, Clone, Default)]
pub struct Blame {
pub entries: Vec<BlameEntry>,
pub messages: HashMap<Oid, String>,
pub remote_url: Option<String>,
}
#[derive(Clone, Debug, Default)]
pub struct ParsedCommitMessage {
pub message: SharedString,
pub permalink: Option<url::Url>,
pub pull_request: Option<crate::hosting_provider::PullRequest>,
pub remote: Option<GitRemote>,
}
impl Blame {
pub async fn for_path(
git_binary: &Path,
working_directory: &Path,
path: &Path,
content: &Rope,
remote_url: Option<String>,
) -> Result<Self> {
let output = run_git_blame(git_binary, working_directory, path, content).await?;
let mut entries = parse_git_blame(&output)?;
entries.sort_unstable_by(|a, b| a.range.start.cmp(&b.range.start));
let mut unique_shas = HashSet::default();
for entry in entries.iter_mut() {
unique_shas.insert(entry.sha);
}
let shas = unique_shas.into_iter().collect::<Vec<_>>();
let messages = get_messages(working_directory, &shas)
.await
.context("failed to get commit messages")?;
Ok(Self {
entries,
messages,
remote_url,
})
}
}
const GIT_BLAME_NO_COMMIT_ERROR: &str = "fatal: no such ref: HEAD";
const GIT_BLAME_NO_PATH: &str = "fatal: no such path";
async fn run_git_blame(
git_binary: &Path,
working_directory: &Path,
path: &Path,
contents: &Rope,
) -> Result<String> {
let mut child = util::command::new_smol_command(git_binary)
.current_dir(working_directory)
.arg("blame")
.arg("--incremental")
.arg("--contents")
.arg("-")
.arg(path.as_os_str())
.stdin(Stdio::piped())
.stdout(Stdio::piped())
.stderr(Stdio::piped())
.spawn()
.map_err(|e| anyhow!("Failed to start git blame process: {}", e))?;
let stdin = child
.stdin
.as_mut()
.context("failed to get pipe to stdin of git blame command")?;
for chunk in contents.chunks() {
stdin.write_all(chunk.as_bytes()).await?;
}
stdin.flush().await?;
let output = child
.output()
.await
.map_err(|e| anyhow!("Failed to read git blame output: {}", e))?;
if !output.status.success() {
let stderr = String::from_utf8_lossy(&output.stderr);
let trimmed = stderr.trim();
if trimmed == GIT_BLAME_NO_COMMIT_ERROR || trimmed.contains(GIT_BLAME_NO_PATH) {
return Ok(String::new());
}
return Err(anyhow!("git blame process failed: {}", stderr));
}
Ok(String::from_utf8(output.stdout)?)
}
#[derive(Serialize, Deserialize, Default, Debug, Clone, PartialEq, Eq)]
pub struct BlameEntry {
pub sha: Oid,
pub range: Range<u32>,
pub original_line_number: u32,
pub author: Option<String>,
pub author_mail: Option<String>,
pub author_time: Option<i64>,
pub author_tz: Option<String>,
pub committer_name: Option<String>,
pub committer_email: Option<String>,
pub committer_time: Option<i64>,
pub committer_tz: Option<String>,
pub summary: Option<String>,
pub previous: Option<String>,
pub filename: String,
}
impl BlameEntry {
// Returns a BlameEntry by parsing the first line of a `git blame --incremental`
// entry. The line MUST have this format:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
fn new_from_blame_line(line: &str) -> Result<BlameEntry> {
let mut parts = line.split_whitespace();
let sha = parts
.next()
.and_then(|line| line.parse::<Oid>().ok())
.ok_or_else(|| anyhow!("failed to parse sha"))?;
let original_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse original line number"))?;
let final_line_number = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let line_count = parts
.next()
.and_then(|line| line.parse::<u32>().ok())
.ok_or_else(|| anyhow!("Failed to parse final line number"))?;
let start_line = final_line_number.saturating_sub(1);
let end_line = start_line + line_count;
let range = start_line..end_line;
Ok(Self {
sha,
range,
original_line_number,
..Default::default()
})
}
pub fn author_offset_date_time(&self) -> Result<time::OffsetDateTime> {
if let (Some(author_time), Some(author_tz)) = (self.author_time, &self.author_tz) {
let format = format_description!("[offset_hour][offset_minute]");
let offset = UtcOffset::parse(author_tz, &format)?;
let date_time_utc = OffsetDateTime::from_unix_timestamp(author_time)?;
Ok(date_time_utc.to_offset(offset))
} else {
// Directly return current time in UTC if there's no committer time or timezone
Ok(time::OffsetDateTime::now_utc())
}
}
}
// parse_git_blame parses the output of `git blame --incremental`, which returns
// all the blame-entries for a given path incrementally, as it finds them.
//
// Each entry *always* starts with:
//
// <40-byte-hex-sha1> <sourceline> <resultline> <num-lines>
//
// Each entry *always* ends with:
//
// filename <whitespace-quoted-filename-goes-here>
//
// Line numbers are 1-indexed.
//
// A `git blame --incremental` entry looks like this:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 2 2 1
// author Joe Schmoe
// author-mail <joe.schmoe@example.com>
// author-time 1709741400
// author-tz +0100
// committer Joe Schmoe
// committer-mail <joe.schmoe@example.com>
// committer-time 1709741400
// committer-tz +0100
// summary Joe's cool commit
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// If the entry has the same SHA as an entry that was already printed then no
// signature information is printed:
//
// 6ad46b5257ba16d12c5ca9f0d4900320959df7f4 3 4 1
// previous 486c2409237a2c627230589e567024a96751d475 index.js
// filename index.js
//
// More about `--incremental` output: https://mirrors.edge.kernel.org/pub/software/scm/git/docs/git-blame.html
fn parse_git_blame(output: &str) -> Result<Vec<BlameEntry>> {
let mut entries: Vec<BlameEntry> = Vec::new();
let mut index: HashMap<Oid, usize> = HashMap::default();
let mut current_entry: Option<BlameEntry> = None;
for line in output.lines() {
let mut done = false;
match &mut current_entry {
None => {
let mut new_entry = BlameEntry::new_from_blame_line(line)?;
if let Some(existing_entry) = index
.get(&new_entry.sha)
.and_then(|slot| entries.get(*slot))
{
new_entry.author.clone_from(&existing_entry.author);
new_entry
.author_mail
.clone_from(&existing_entry.author_mail);
new_entry.author_time = existing_entry.author_time;
new_entry.author_tz.clone_from(&existing_entry.author_tz);
new_entry
.committer_name
.clone_from(&existing_entry.committer_name);
new_entry
.committer_email
.clone_from(&existing_entry.committer_email);
new_entry.committer_time = existing_entry.committer_time;
new_entry
.committer_tz
.clone_from(&existing_entry.committer_tz);
new_entry.summary.clone_from(&existing_entry.summary);
}
current_entry.replace(new_entry);
}
Some(entry) => {
let Some((key, value)) = line.split_once(' ') else {
continue;
};
let is_committed = !entry.sha.is_zero();
match key {
"filename" => {
entry.filename = value.into();
done = true;
}
"previous" => entry.previous = Some(value.into()),
"summary" if is_committed => entry.summary = Some(value.into()),
"author" if is_committed => entry.author = Some(value.into()),
"author-mail" if is_committed => entry.author_mail = Some(value.into()),
"author-time" if is_committed => {
entry.author_time = Some(value.parse::<i64>()?)
}
"author-tz" if is_committed => entry.author_tz = Some(value.into()),
"committer" if is_committed => entry.committer_name = Some(value.into()),
"committer-mail" if is_committed => entry.committer_email = Some(value.into()),
"committer-time" if is_committed => {
entry.committer_time = Some(value.parse::<i64>()?)
}
"committer-tz" if is_committed => entry.committer_tz = Some(value.into()),
_ => {}
}
}
};
if done {
if let Some(entry) = current_entry.take() {
index.insert(entry.sha, entries.len());
// We only want annotations that have a commit.
if !entry.sha.is_zero() {
entries.push(entry);
}
}
}
}
Ok(entries)
}
#[cfg(test)]
mod tests {
use std::path::PathBuf;
use super::BlameEntry;
use super::parse_git_blame;
fn read_test_data(filename: &str) -> String {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push(filename);
std::fs::read_to_string(&path)
.unwrap_or_else(|_| panic!("Could not read test data at {:?}. Is it generated?", path))
}
fn assert_eq_golden(entries: &Vec<BlameEntry>, golden_filename: &str) {
let mut path = PathBuf::from(env!("CARGO_MANIFEST_DIR"));
path.push("test_data");
path.push("golden");
path.push(format!("{}.json", golden_filename));
let mut have_json =
serde_json::to_string_pretty(&entries).expect("could not serialize entries to JSON");
// We always want to save with a trailing newline.
have_json.push('\n');
let update = std::env::var("UPDATE_GOLDEN")
.map(|val| val.eq_ignore_ascii_case("true"))
.unwrap_or(false);
if update {
std::fs::create_dir_all(path.parent().unwrap())
.expect("could not create golden test data directory");
std::fs::write(&path, have_json).expect("could not write out golden data");
} else {
let want_json =
std::fs::read_to_string(&path).unwrap_or_else(|_| {
panic!("could not read golden test data file at {:?}. Did you run the test with UPDATE_GOLDEN=true before?", path);
}).replace("\r\n", "\n");
pretty_assertions::assert_eq!(have_json, want_json, "wrong blame entries");
}
}
#[test]
fn test_parse_git_blame_not_committed() {
let output = read_test_data("blame_incremental_not_committed");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_not_committed");
}
#[test]
fn test_parse_git_blame_simple() {
let output = read_test_data("blame_incremental_simple");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_simple");
}
#[test]
fn test_parse_git_blame_complex() {
let output = read_test_data("blame_incremental_complex");
let entries = parse_git_blame(&output).unwrap();
assert_eq_golden(&entries, "blame_incremental_complex");
}
}

View File

@@ -1,872 +0,0 @@
use language::{BufferSnapshot, Diff, Point, ToOffset};
use project::search::SearchQuery;
use std::iter;
use util::{ResultExt as _, paths::PathMatcher};
/// Performs an exact string replacement in a buffer, requiring precise character-for-character matching.
/// Uses the search functionality to locate the first occurrence of the exact string.
/// Returns None if no exact match is found in the buffer.
pub async fn replace_exact(old: &str, new: &str, snapshot: &BufferSnapshot) -> Option<Diff> {
let query = SearchQuery::text(
old,
false,
true,
true,
PathMatcher::new(iter::empty::<&str>()).ok()?,
PathMatcher::new(iter::empty::<&str>()).ok()?,
false,
None,
)
.log_err()?;
let matches = query.search(&snapshot, None).await;
if matches.is_empty() {
return None;
}
let edit_range = matches[0].clone();
let diff = language::text_diff(&old, &new);
let edits = diff
.into_iter()
.map(|(old_range, text)| {
let start = edit_range.start + old_range.start;
let end = edit_range.start + old_range.end;
(start..end, text)
})
.collect::<Vec<_>>();
let diff = language::Diff {
base_version: snapshot.version().clone(),
line_ending: snapshot.line_ending(),
edits,
};
Some(diff)
}
/// Performs a replacement that's indentation-aware - matches text content ignoring leading whitespace differences.
/// When replacing, preserves the indentation level found in the buffer at each matching line.
/// Returns None if no match found or if indentation is offset inconsistently across matched lines.
pub fn replace_with_flexible_indent(old: &str, new: &str, buffer: &BufferSnapshot) -> Option<Diff> {
let (old_lines, old_min_indent) = lines_with_min_indent(old);
let (new_lines, new_min_indent) = lines_with_min_indent(new);
let min_indent = old_min_indent.min(new_min_indent);
let old_lines = drop_lines_prefix(&old_lines, min_indent);
let new_lines = drop_lines_prefix(&new_lines, min_indent);
let max_row = buffer.max_point().row;
'windows: for start_row in 0..max_row + 1 {
let end_row = start_row + old_lines.len().saturating_sub(1) as u32;
if end_row > max_row {
// The buffer ends before fully matching the pattern
return None;
}
let start_point = Point::new(start_row, 0);
let end_point = Point::new(end_row, buffer.line_len(end_row));
let range = start_point.to_offset(buffer)..end_point.to_offset(buffer);
let window_text = buffer.text_for_range(range.clone());
let mut window_lines = window_text.lines();
let mut old_lines_iter = old_lines.iter();
let mut common_mismatch = None;
#[derive(Eq, PartialEq)]
enum Mismatch {
OverIndented(String),
UnderIndented(String),
}
while let (Some(window_line), Some(old_line)) = (window_lines.next(), old_lines_iter.next())
{
let line_trimmed = window_line.trim_start();
if line_trimmed != old_line.trim_start() {
continue 'windows;
}
if line_trimmed.is_empty() {
continue;
}
let line_mismatch = if window_line.len() > old_line.len() {
let prefix = window_line[..window_line.len() - old_line.len()].to_string();
Mismatch::UnderIndented(prefix)
} else {
let prefix = old_line[..old_line.len() - window_line.len()].to_string();
Mismatch::OverIndented(prefix)
};
match &common_mismatch {
Some(common_mismatch) if common_mismatch != &line_mismatch => {
continue 'windows;
}
Some(_) => (),
None => common_mismatch = Some(line_mismatch),
}
}
if let Some(common_mismatch) = &common_mismatch {
let line_ending = buffer.line_ending();
let replacement = new_lines
.iter()
.map(|new_line| {
if new_line.trim().is_empty() {
new_line.to_string()
} else {
match common_mismatch {
Mismatch::UnderIndented(prefix) => prefix.to_string() + new_line,
Mismatch::OverIndented(prefix) => new_line
.strip_prefix(prefix)
.unwrap_or(new_line)
.to_string(),
}
}
})
.collect::<Vec<_>>()
.join(line_ending.as_str());
let diff = Diff {
base_version: buffer.version().clone(),
line_ending,
edits: vec![(range, replacement.into())],
};
return Some(diff);
}
}
None
}
fn drop_lines_prefix<'a>(lines: &'a [&str], prefix_len: usize) -> Vec<&'a str> {
lines
.iter()
.map(|line| line.get(prefix_len..).unwrap_or(""))
.collect()
}
fn lines_with_min_indent(input: &str) -> (Vec<&str>, usize) {
let mut lines = Vec::new();
let mut min_indent: Option<usize> = None;
for line in input.lines() {
lines.push(line);
if !line.trim().is_empty() {
let indent = line.len() - line.trim_start().len();
min_indent = Some(min_indent.map_or(indent, |m| m.min(indent)));
}
}
(lines, min_indent.unwrap_or(0))
}
#[cfg(test)]
mod replace_exact_tests {
use super::*;
use gpui::TestAppContext;
use gpui::prelude::*;
#[gpui::test]
async fn basic(cx: &mut TestAppContext) {
let result = test_replace_exact(cx, "let x = 41;", "let x = 41;", "let x = 42;").await;
assert_eq!(result, Some("let x = 42;".to_string()));
}
#[gpui::test]
async fn no_match(cx: &mut TestAppContext) {
let result = test_replace_exact(cx, "let x = 41;", "let y = 42;", "let y = 43;").await;
assert_eq!(result, None);
}
#[gpui::test]
async fn multi_line(cx: &mut TestAppContext) {
let whole = "fn example() {\n let x = 41;\n println!(\"x = {}\", x);\n}";
let old_text = " let x = 41;\n println!(\"x = {}\", x);";
let new_text = " let x = 42;\n println!(\"x = {}\", x);";
let result = test_replace_exact(cx, whole, old_text, new_text).await;
assert_eq!(
result,
Some("fn example() {\n let x = 42;\n println!(\"x = {}\", x);\n}".to_string())
);
}
#[gpui::test]
async fn multiple_occurrences(cx: &mut TestAppContext) {
let whole = "let x = 41;\nlet y = 41;\nlet z = 41;";
let result = test_replace_exact(cx, whole, "let x = 41;", "let x = 42;").await;
assert_eq!(
result,
Some("let x = 42;\nlet y = 41;\nlet z = 41;".to_string())
);
}
#[gpui::test]
async fn empty_buffer(cx: &mut TestAppContext) {
let result = test_replace_exact(cx, "", "let x = 41;", "let x = 42;").await;
assert_eq!(result, None);
}
#[gpui::test]
async fn partial_match(cx: &mut TestAppContext) {
let whole = "let x = 41; let y = 42;";
let result = test_replace_exact(cx, whole, "let x = 41", "let x = 42").await;
assert_eq!(result, Some("let x = 42; let y = 42;".to_string()));
}
#[gpui::test]
async fn whitespace_sensitive(cx: &mut TestAppContext) {
let result = test_replace_exact(cx, "let x = 41;", " let x = 41;", "let x = 42;").await;
assert_eq!(result, None);
}
#[gpui::test]
async fn entire_buffer(cx: &mut TestAppContext) {
let result = test_replace_exact(cx, "let x = 41;", "let x = 41;", "let x = 42;").await;
assert_eq!(result, Some("let x = 42;".to_string()));
}
async fn test_replace_exact(
cx: &mut TestAppContext,
whole: &str,
old: &str,
new: &str,
) -> Option<String> {
let buffer = cx.new(|cx| language::Buffer::local(whole, cx));
let buffer_snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
let diff = replace_exact(old, new, &buffer_snapshot).await;
diff.map(|diff| {
buffer.update(cx, |buffer, cx| {
let _ = buffer.apply_diff(diff, cx);
buffer.text()
})
})
}
}
#[cfg(test)]
mod flexible_indent_tests {
use super::*;
use gpui::TestAppContext;
use gpui::prelude::*;
use unindent::Unindent;
#[gpui::test]
fn test_underindented_single_line(cx: &mut TestAppContext) {
let cur = " let a = 41;".to_string();
let old = " let a = 41;".to_string();
let new = " let a = 42;".to_string();
let exp = " let a = 42;".to_string();
let result = test_replace_with_flexible_indent(cx, &cur, &old, &new);
assert_eq!(result, Some(exp.to_string()))
}
#[gpui::test]
fn test_overindented_single_line(cx: &mut TestAppContext) {
let cur = " let a = 41;".to_string();
let old = " let a = 41;".to_string();
let new = " let a = 42;".to_string();
let exp = " let a = 42;".to_string();
let result = test_replace_with_flexible_indent(cx, &cur, &old, &new);
assert_eq!(result, Some(exp.to_string()))
}
#[gpui::test]
fn test_underindented_multi_line(cx: &mut TestAppContext) {
let whole = r#"
fn test() {
let x = 5;
println!("x = {}", x);
let y = 10;
}
"#
.unindent();
let old = r#"
let x = 5;
println!("x = {}", x);
"#
.unindent();
let new = r#"
let x = 42;
println!("New value: {}", x);
"#
.unindent();
let expected = r#"
fn test() {
let x = 42;
println!("New value: {}", x);
let y = 10;
}
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
Some(expected.to_string())
);
}
#[gpui::test]
fn test_overindented_multi_line(cx: &mut TestAppContext) {
let cur = r#"
fn foo() {
let a = 41;
let b = 3.13;
}
"#
.unindent();
// 6 space indent instead of 4
let old = " let a = 41;\n let b = 3.13;";
let new = " let a = 42;\n let b = 3.14;";
let expected = r#"
fn foo() {
let a = 42;
let b = 3.14;
}
"#
.unindent();
let result = test_replace_with_flexible_indent(cx, &cur, &old, &new);
assert_eq!(result, Some(expected.to_string()))
}
#[gpui::test]
fn test_replace_inconsistent_indentation(cx: &mut TestAppContext) {
let whole = r#"
fn test() {
if condition {
println!("{}", 43);
}
}
"#
.unindent();
let old = r#"
if condition {
println!("{}", 43);
"#
.unindent();
let new = r#"
if condition {
println!("{}", 42);
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
None
);
}
#[gpui::test]
fn test_replace_with_empty_lines(cx: &mut TestAppContext) {
// Test with empty lines
let whole = r#"
fn test() {
let x = 5;
println!("x = {}", x);
}
"#
.unindent();
let old = r#"
let x = 5;
println!("x = {}", x);
"#
.unindent();
let new = r#"
let x = 10;
println!("New x: {}", x);
"#
.unindent();
let expected = r#"
fn test() {
let x = 10;
println!("New x: {}", x);
}
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
Some(expected.to_string())
);
}
#[gpui::test]
fn test_replace_no_match(cx: &mut TestAppContext) {
let whole = r#"
fn test() {
let x = 5;
}
"#
.unindent();
let old = r#"
let y = 10;
"#
.unindent();
let new = r#"
let y = 20;
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
None
);
}
#[gpui::test]
fn test_replace_whole_ends_before_matching_old(cx: &mut TestAppContext) {
let whole = r#"
fn test() {
let x = 5;
"#
.unindent();
let old = r#"
let x = 5;
println!("x = {}", x);
"#
.unindent();
let new = r#"
let x = 10;
println!("x = {}", x);
"#
.unindent();
// Should return None because whole doesn't fully contain the old text
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
None
);
}
#[gpui::test]
fn test_replace_whole_is_shorter_than_old(cx: &mut TestAppContext) {
let whole = r#"
let x = 5;
"#
.unindent();
let old = r#"
let x = 5;
let y = 10;
"#
.unindent();
let new = r#"
let x = 5;
let y = 20;
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
None
);
}
#[gpui::test]
fn test_replace_old_is_empty(cx: &mut TestAppContext) {
let whole = r#"
fn test() {
let x = 5;
}
"#
.unindent();
let old = "";
let new = r#"
let y = 10;
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
None
);
}
#[gpui::test]
fn test_replace_whole_is_empty(cx: &mut TestAppContext) {
let whole = "";
let old = r#"
let x = 5;
"#
.unindent();
let new = r#"
let x = 10;
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
None
);
}
#[test]
fn test_lines_with_min_indent() {
// Empty string
assert_eq!(lines_with_min_indent(""), (vec![], 0));
// Single line without indentation
assert_eq!(lines_with_min_indent("hello"), (vec!["hello"], 0));
// Multiple lines with no indentation
assert_eq!(
lines_with_min_indent("line1\nline2\nline3"),
(vec!["line1", "line2", "line3"], 0)
);
// Multiple lines with consistent indentation
assert_eq!(
lines_with_min_indent(" line1\n line2\n line3"),
(vec![" line1", " line2", " line3"], 2)
);
// Multiple lines with varying indentation
assert_eq!(
lines_with_min_indent(" line1\n line2\n line3"),
(vec![" line1", " line2", " line3"], 2)
);
// Lines with mixed indentation and empty lines
assert_eq!(
lines_with_min_indent(" line1\n\n line2"),
(vec![" line1", "", " line2"], 2)
);
}
#[gpui::test]
fn test_replace_with_missing_indent_uneven_match(cx: &mut TestAppContext) {
let whole = r#"
fn test() {
if true {
let x = 5;
println!("x = {}", x);
}
}
"#
.unindent();
let old = r#"
let x = 5;
println!("x = {}", x);
"#
.unindent();
let new = r#"
let x = 42;
println!("x = {}", x);
"#
.unindent();
let expected = r#"
fn test() {
if true {
let x = 42;
println!("x = {}", x);
}
}
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
Some(expected.to_string())
);
}
#[gpui::test]
fn test_replace_big_example(cx: &mut TestAppContext) {
let whole = r#"
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_valid_age() {
assert!(is_valid_age(0));
assert!(!is_valid_age(151));
}
}
"#
.unindent();
let old = r#"
#[test]
fn test_is_valid_age() {
assert!(is_valid_age(0));
assert!(!is_valid_age(151));
}
"#
.unindent();
let new = r#"
#[test]
fn test_is_valid_age() {
assert!(is_valid_age(0));
assert!(!is_valid_age(151));
}
#[test]
fn test_group_people_by_age() {
let people = vec![
Person::new("Young One", 5, "young@example.com").unwrap(),
Person::new("Teen One", 15, "teen@example.com").unwrap(),
Person::new("Teen Two", 18, "teen2@example.com").unwrap(),
Person::new("Adult One", 25, "adult@example.com").unwrap(),
];
let groups = group_people_by_age(&people);
assert_eq!(groups.get(&0).unwrap().len(), 1); // One person in 0-9
assert_eq!(groups.get(&10).unwrap().len(), 2); // Two people in 10-19
assert_eq!(groups.get(&20).unwrap().len(), 1); // One person in 20-29
}
"#
.unindent();
let expected = r#"
#[cfg(test)]
mod tests {
use super::*;
#[test]
fn test_is_valid_age() {
assert!(is_valid_age(0));
assert!(!is_valid_age(151));
}
#[test]
fn test_group_people_by_age() {
let people = vec![
Person::new("Young One", 5, "young@example.com").unwrap(),
Person::new("Teen One", 15, "teen@example.com").unwrap(),
Person::new("Teen Two", 18, "teen2@example.com").unwrap(),
Person::new("Adult One", 25, "adult@example.com").unwrap(),
];
let groups = group_people_by_age(&people);
assert_eq!(groups.get(&0).unwrap().len(), 1); // One person in 0-9
assert_eq!(groups.get(&10).unwrap().len(), 2); // Two people in 10-19
assert_eq!(groups.get(&20).unwrap().len(), 1); // One person in 20-29
}
}
"#
.unindent();
assert_eq!(
test_replace_with_flexible_indent(cx, &whole, &old, &new),
Some(expected.to_string())
);
}
#[test]
fn test_drop_lines_prefix() {
// Empty array
assert_eq!(drop_lines_prefix(&[], 2), Vec::<&str>::new());
// Zero prefix length
assert_eq!(
drop_lines_prefix(&["line1", "line2"], 0),
vec!["line1", "line2"]
);
// Normal prefix drop
assert_eq!(
drop_lines_prefix(&[" line1", " line2"], 2),
vec!["line1", "line2"]
);
// Prefix longer than some lines
assert_eq!(drop_lines_prefix(&[" line1", "a"], 2), vec!["line1", ""]);
// Prefix longer than all lines
assert_eq!(drop_lines_prefix(&["a", "b"], 5), vec!["", ""]);
// Mixed length lines
assert_eq!(
drop_lines_prefix(&[" line1", " line2", " line3"], 2),
vec![" line1", "line2", " line3"]
);
}
#[gpui::test]
async fn test_replace_exact_basic(cx: &mut TestAppContext) {
let buffer = cx.new(|cx| language::Buffer::local("let x = 41;", cx));
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
let diff = replace_exact("let x = 41;", "let x = 42;", &snapshot).await;
assert!(diff.is_some());
let diff = diff.unwrap();
assert_eq!(diff.edits.len(), 1);
let result = buffer.update(cx, |buffer, cx| {
let _ = buffer.apply_diff(diff, cx);
buffer.text()
});
assert_eq!(result, "let x = 42;");
}
#[gpui::test]
async fn test_replace_exact_no_match(cx: &mut TestAppContext) {
let buffer = cx.new(|cx| language::Buffer::local("let x = 41;", cx));
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
let diff = replace_exact("let y = 42;", "let y = 43;", &snapshot).await;
assert!(diff.is_none());
}
#[gpui::test]
async fn test_replace_exact_multi_line(cx: &mut TestAppContext) {
let buffer = cx.new(|cx| {
language::Buffer::local(
"fn example() {\n let x = 41;\n println!(\"x = {}\", x);\n}",
cx,
)
});
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
let old_text = " let x = 41;\n println!(\"x = {}\", x);";
let new_text = " let x = 42;\n println!(\"x = {}\", x);";
let diff = replace_exact(old_text, new_text, &snapshot).await;
assert!(diff.is_some());
let diff = diff.unwrap();
let result = buffer.update(cx, |buffer, cx| {
let _ = buffer.apply_diff(diff, cx);
buffer.text()
});
assert_eq!(
result,
"fn example() {\n let x = 42;\n println!(\"x = {}\", x);\n}"
);
}
#[gpui::test]
async fn test_replace_exact_multiple_occurrences(cx: &mut TestAppContext) {
let buffer =
cx.new(|cx| language::Buffer::local("let x = 41;\nlet y = 41;\nlet z = 41;", cx));
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
// Should replace only the first occurrence
let diff = replace_exact("let x = 41;", "let x = 42;", &snapshot).await;
assert!(diff.is_some());
let diff = diff.unwrap();
let result = buffer.update(cx, |buffer, cx| {
let _ = buffer.apply_diff(diff, cx);
buffer.text()
});
assert_eq!(result, "let x = 42;\nlet y = 41;\nlet z = 41;");
}
#[gpui::test]
async fn test_replace_exact_empty_buffer(cx: &mut TestAppContext) {
let buffer = cx.new(|cx| language::Buffer::local("", cx));
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
let diff = replace_exact("let x = 41;", "let x = 42;", &snapshot).await;
assert!(diff.is_none());
}
#[gpui::test]
async fn test_replace_exact_partial_match(cx: &mut TestAppContext) {
let buffer = cx.new(|cx| language::Buffer::local("let x = 41; let y = 42;", cx));
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
// Verify substring replacement actually works
let diff = replace_exact("let x = 41", "let x = 42", &snapshot).await;
assert!(diff.is_some());
let diff = diff.unwrap();
let result = buffer.update(cx, |buffer, cx| {
let _ = buffer.apply_diff(diff, cx);
buffer.text()
});
assert_eq!(result, "let x = 42; let y = 42;");
}
#[gpui::test]
async fn test_replace_exact_whitespace_sensitive(cx: &mut TestAppContext) {
let buffer = cx.new(|cx| language::Buffer::local("let x = 41;", cx));
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
let diff = replace_exact(" let x = 41;", "let x = 42;", &snapshot).await;
assert!(diff.is_none());
}
#[gpui::test]
async fn test_replace_exact_entire_buffer(cx: &mut TestAppContext) {
let buffer = cx.new(|cx| language::Buffer::local("let x = 41;", cx));
let snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
let diff = replace_exact("let x = 41;", "let x = 42;", &snapshot).await;
assert!(diff.is_some());
let diff = diff.unwrap();
let result = buffer.update(cx, |buffer, cx| {
let _ = buffer.apply_diff(diff, cx);
buffer.text()
});
assert_eq!(result, "let x = 42;");
}
fn test_replace_with_flexible_indent(
cx: &mut TestAppContext,
whole: &str,
old: &str,
new: &str,
) -> Option<String> {
// Create a local buffer with the test content
let buffer = cx.new(|cx| language::Buffer::local(whole, cx));
// Get the buffer snapshot
let buffer_snapshot = buffer.read_with(cx, |buffer, _cx| buffer.snapshot());
// Call replace_flexible and transform the result
replace_with_flexible_indent(old, new, &buffer_snapshot).map(|diff| {
buffer.update(cx, |buffer, cx| {
let _ = buffer.apply_diff(diff, cx);
buffer.text()
})
})
}
}

View File

@@ -0,0 +1,32 @@
use anyhow::Result;
use handlebars::Handlebars;
use rust_embed::RustEmbed;
use serde::Serialize;
use std::sync::Arc;
#[derive(RustEmbed)]
#[folder = "src/templates"]
#[include = "*.hbs"]
struct Assets;
pub struct Templates(Handlebars<'static>);
impl Templates {
pub fn new() -> Arc<Self> {
let mut handlebars = Handlebars::new();
handlebars.register_embed_templates::<Assets>().unwrap();
handlebars.register_escape_fn(|text| text.into());
Arc::new(Self(handlebars))
}
}
pub trait Template: Sized {
const TEMPLATE_NAME: &'static str;
fn render(&self, templates: &Templates) -> Result<String>
where
Self: Serialize + Sized,
{
Ok(templates.0.render(Self::TEMPLATE_NAME, self)?)
}
}

View File

@@ -0,0 +1,46 @@
You are an expert text editor. Taking the following file as an input:
```{{path}}
{{file_content}}
```
Your response must be a series of edits in the following format:
<edits>
<old_text>
OLD TEXT 1 HERE
</old_text>
<new_text>
NEW TEXT 1 HERE
</new_text>
<old_text>
OLD TEXT 2 HERE
</old_text>
<new_text>
NEW TEXT 2 HERE
</new_text>
<old_text>
OLD TEXT 3 HERE
</old_text>
<new_text>
NEW TEXT 3 HERE
</new_text>
</edits>
Rules for editing:
- `old_text` represents full lines (including indentation) in the input file that will be replaced with `new_text`
- It is crucial that `old_text` is unique and unambiguous.
- Always include enough context around the lines you want to replace in `old_text` such that it's impossible to mistake them for other lines.
- If you want to replace many occurrences of the same text, repeat the same `old_text`/`new_text` pair multiple times and I will apply them sequentially, one occurrence at a time.
- Don't explain why you made a change, just report the edits.
- Never do MORE than what the user has requested.
- Never do LESS than what the user has requested.
<user_instructions>
{{instructions}}
</user_instructions>
<edits>

View File

@@ -32,6 +32,7 @@ language_models.workspace = true
languages = { workspace = true, features = ["load-grammars"] }
node_runtime.workspace = true
paths.workspace = true
pretty_assertions.workspace = true
project.workspace = true
prompt_store.workspace = true
regex.workspace = true
@@ -48,6 +49,7 @@ unindent.workspace = true
util.workspace = true
uuid = { version = "1.6", features = ["v4"] }
workspace-hack.workspace = true
[[bin]]
name = "eval"
path = "src/eval.rs"

View File

@@ -160,7 +160,11 @@ impl ExampleContext {
if left == right {
Ok(())
} else {
println!("{}{:#?} != {:#?}", self.log_prefix, left, right);
println!(
"{}{}",
self.log_prefix,
pretty_assertions::Comparison::new(&left, &right)
);
Err(anyhow::Error::from(FailedAssertion(message.clone())))
},
message,
@@ -333,8 +337,8 @@ impl ExampleContext {
}
pub fn edits(&self) -> HashMap<Arc<Path>, FileEdits> {
self.app
.read_entity(&self.agent_thread, |thread, cx| {
self.agent_thread
.read_with(&self.app, |thread, cx| {
let action_log = thread.action_log().read(cx);
HashMap::from_iter(action_log.changed_buffers(cx).into_iter().map(
|(buffer, diff)| {
@@ -413,16 +417,16 @@ impl ToolUse {
}
}
#[derive(Debug)]
#[derive(Debug, Eq, PartialEq)]
pub struct FileEdits {
hunks: Vec<FileEditHunk>,
pub hunks: Vec<FileEditHunk>,
}
#[derive(Debug)]
struct FileEditHunk {
base_text: String,
text: String,
status: DiffHunkStatus,
#[derive(Debug, Eq, PartialEq)]
pub struct FileEditHunk {
pub base_text: String,
pub text: String,
pub status: DiffHunkStatus,
}
impl FileEdits {

View File

@@ -0,0 +1,5 @@
mod delete_run_git_blame;
mod extract_handle_command_output;
pub use delete_run_git_blame::*;
pub use extract_handle_command_output::*;

View File

@@ -0,0 +1,33 @@
use crate::example::{Example, ExampleContext, ExampleMetadata};
use anyhow::Result;
use async_trait::async_trait;
pub struct DeleteRunGitBlame;
#[async_trait(?Send)]
impl Example for DeleteRunGitBlame {
fn meta(&self) -> ExampleMetadata {
ExampleMetadata {
name: "edit::delete_run_git_blame".to_string(),
url: "https://github.com/zed-industries/zed.git".to_string(),
revision: "58604fba86ebbffaa01f7c6834253e33bcd38c0f".to_string(),
language_server: None,
max_assertions: None,
}
}
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
cx.push_user_message(format!(
r#"
Read the `crates/git/src/blame.rs` file and delete `run_git_blame`. Just that
one function, not its usages.
IMPORTANT: You are only allowed to use the `read_file` and `edit_file` tools!
"#
));
cx.run_to_end().await?;
// todo!("add assertions")
Ok(())
}
}

View File

@@ -0,0 +1,33 @@
use crate::example::{Example, ExampleContext, ExampleMetadata};
use anyhow::Result;
use async_trait::async_trait;
pub struct ExtractHandleCommandOutput;
#[async_trait(?Send)]
impl Example for ExtractHandleCommandOutput {
fn meta(&self) -> ExampleMetadata {
ExampleMetadata {
name: "edit::extract_handle_command_output".to_string(),
url: "https://github.com/zed-industries/zed.git".to_string(),
revision: "58604fba86ebbffaa01f7c6834253e33bcd38c0f".to_string(),
language_server: None,
max_assertions: None,
}
}
async fn conversation(&self, cx: &mut ExampleContext) -> Result<()> {
cx.push_user_message(format!(
r#"
Read the `crates/git/src/blame.rs` file and extract a method in the final stanza of
`run_git_blame` to deal with command failures, call it `handle_command_output`.
IMPORTANT: You are only allowed to use the `read_file` and `edit_file` tools!
"#
));
cx.run_to_end().await?;
// todo!("add assertions")
Ok(())
}
}

View File

@@ -12,12 +12,15 @@ use util::serde::default_true;
use crate::example::{Example, ExampleContext, ExampleMetadata, JudgeAssertion};
mod add_arg_to_trait_method;
mod edit;
mod file_search;
pub fn all(examples_dir: &Path) -> Vec<Rc<dyn Example>> {
let mut threads: Vec<Rc<dyn Example>> = vec![
Rc::new(file_search::FileSearchExample),
Rc::new(add_arg_to_trait_method::AddArgToTraitMethod),
Rc::new(edit::DeleteRunGitBlame),
Rc::new(edit::ExtractHandleCommandOutput),
];
for example_path in list_declarative_examples(examples_dir).unwrap() {

View File

@@ -48,7 +48,10 @@ pub async fn stream_generate_content(
if let Some(line) = line.strip_prefix("data: ") {
match serde_json::from_str(line) {
Ok(response) => Some(Ok(response)),
Err(error) => Some(Err(anyhow!(error))),
Err(error) => Some(Err(anyhow!(format!(
"Error parsing JSON: {:?}\n{:?}",
error, line
)))),
}
} else {
None
@@ -152,6 +155,7 @@ pub struct GenerateContentCandidate {
#[derive(Debug, Serialize, Deserialize)]
#[serde(rename_all = "camelCase")]
pub struct Content {
#[serde(default)]
pub parts: Vec<Part>,
pub role: Role,
}

View File

@@ -79,7 +79,7 @@ pub(crate) use test::*;
pub(crate) use windows::*;
#[cfg(any(test, feature = "test-support"))]
pub use test::TestScreenCaptureSource;
pub use test::{TestDispatcher, TestScreenCaptureSource};
/// Returns a background executor for the current platform.
pub fn background_executor() -> BackgroundExecutor {

View File

@@ -3,7 +3,7 @@ mod display;
mod platform;
mod window;
pub(crate) use dispatcher::*;
pub use dispatcher::*;
pub(crate) use display::*;
pub(crate) use platform::*;
pub(crate) use window::*;