Compare commits

...

2 Commits

Author SHA1 Message Date
Oleksiy Syvokon
c6d38502f2 Update edit_agent evals results 2025-05-28 22:36:01 +03:00
Nathan Sobo
c3cb5a0579 WIP: A bunch of manually tested prompt changes that make Gemini 2.5 Pro
work better for me
2025-05-28 11:04:55 -06:00
7 changed files with 41 additions and 17 deletions

View File

@@ -617,10 +617,10 @@ impl EditAgent {
conversation.messages.pop();
}
} else {
debug_panic!(
"Last message must be an Assistant tool calling! Got {:?}",
last_message.content
);
// debug_panic!(
// "Last message must be an Assistant tool calling! Got {:?}",
// last_message.content
// );
}
}
@@ -651,7 +651,7 @@ impl EditAgent {
tool_choice,
tools,
stop: Vec::new(),
temperature: None,
temperature: Some(0.2),
};
Ok(self.model.stream_completion_text(request, cx).await?.stream)

View File

@@ -39,7 +39,7 @@ fn eval_extract_handle_command_output() {
// Model | Pass rate
// ----------------------------|----------
// claude-3.7-sonnet | 0.98
// gemini-2.5-pro | 0.86
// gemini-2.5-pro | 0.98
// gemini-2.5-flash | 0.11
// gpt-4.1 | 1.00
@@ -109,6 +109,10 @@ fn eval_extract_handle_command_output() {
#[test]
#[cfg_attr(not(feature = "eval"), ignore)]
fn eval_delete_run_git_blame() {
// Model | Pass rate
// ----------------------------|----------
// claude-3.7-sonnet | 1.0
// gemini-2.5-pro | 1.0
let input_file_path = "root/blame.rs";
let input_file_content = include_str!("evals/fixtures/delete_run_git_blame/before.rs");
let output_file_content = include_str!("evals/fixtures/delete_run_git_blame/after.rs");
@@ -168,7 +172,7 @@ fn eval_translate_doc_comments() {
// Model | Pass rate
// ============================================
//
// claude-3.7-sonnet |
// claude-3.7-sonnet | 1.0
// gemini-2.5-pro-preview-03-25 | 1.0
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
@@ -231,7 +235,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
// ============================================
//
// claude-3.7-sonnet | 0.98
// gemini-2.5-pro-preview-03-25 | 0.99
// gemini-2.5-pro-preview-03-25 | 0.97
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/lib.rs";
@@ -439,7 +443,7 @@ fn eval_from_pixels_constructor() {
// ============================================
//
// claude-3.7-sonnet |
// gemini-2.5-pro-preview-03-25 | 0.94
// gemini-2.5-pro-preview-03-25 | 0.85
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/canvas.rs";
@@ -745,7 +749,7 @@ fn eval_add_overwrite_test() {
// ============================================
//
// claude-3.7-sonnet | 0.16
// gemini-2.5-pro-preview-03-25 | 0.35
// gemini-2.5-pro-preview-03-25 | 0.40
// gemini-2.5-flash-preview-04-17 |
// gpt-4.1 |
let input_file_path = "root/action_log.rs";

View File

@@ -167,7 +167,7 @@ impl StreamingFuzzyMatcher {
let matched_buffer_row_count = buffer_row_end - buffer_row_start;
let matched_ratio = matched_lines as f32
/ (matched_buffer_row_count as f32).max(new_query_line_count as f32);
if matched_ratio >= 0.8 {
if matched_ratio >= 0.9 {
let buffer_start_ix = self
.snapshot
.point_to_offset(Point::new(buffer_row_start, 0));

View File

@@ -59,6 +59,11 @@ pub struct EditFileToolInput {
/// The full path of the file to create or modify in the project.
///
/// You MUST read the actual text that you intend to edit from a file at this path
/// with the read_file tool before calling the edit_file tool. If the read_file tool
/// returned an outline, you MUST call read_file again to get literal text before
/// calling edit_file.
///
/// WARNING: When specifying which file path need changing, you MUST
/// start each path with one of the project's root directories.
///

View File

@@ -2,7 +2,7 @@ This is a tool for creating a new file or editing an existing file. For moving o
Before using this tool:
1. Use the `read_file` tool to understand the file's contents and context
1. ALWAYS use the `read_file` tool and verify the literal text you plan to edit. If calling `read_file` gives you an outline, call it again to get literal text BEFORE you edit.
2. Verify the directory path is correct (only applicable when creating new files):
- Use the `list_directory` tool to verify the parent directory exists and is the correct location

View File

@@ -233,9 +233,9 @@ impl Tool for ReadFileTool {
{outline}
Using the line numbers in this outline, you can call this tool again
while specifying the start_line and end_line fields to see the
implementations of symbols in the outline."
Using the line numbers in this outline, call read_file again
and specify `start_line` and `end_line` to see actual text from
this file. Do not attempt to edit based on this outline alone."
}
.into())
}

View File

@@ -41,11 +41,12 @@ NEW TEXT 3 HERE
- Edits are sequential - each assumes previous edits are already applied
- Only edit the specified file
- Always close all tags properly
- Do not escape your output
- When deleting text,
{{!-- The following example adds almost 10% pass rate for Gemini 2.5.
Claude and gpt-4.1 don't really need it. --}}
<example>
<example description="adding a field">
<edits>
<old_text>
@@ -76,9 +77,23 @@ struct User {
};
</new_text>
</edits>
</example>
<example description="deleting a struct">
<edits>
<old_text>
struct User {
name: String,
email: String,
active: bool,
}
</old_text>
<new_text>
</new_text>
</edits>
</example>
<file_to_edit>
{{path}}