Simplify synthesis further

This commit is contained in:
Max Brunsfeld
2025-12-30 16:28:50 -08:00
parent 196c488ed4
commit 8953b487ad
3 changed files with 20 additions and 132 deletions

View File

@@ -1,7 +1,7 @@
use anthropic::{
ANTHROPIC_API_URL, Event, Message, Request as AnthropicRequest, RequestContent,
Response as AnthropicResponse, ResponseContent, Role, Tool, ToolChoice,
non_streaming_completion, stream_completion,
Response as AnthropicResponse, ResponseContent, Role, non_streaming_completion,
stream_completion,
};
use anyhow::Result;
use futures::StreamExt as _;
@@ -131,106 +131,6 @@ impl PlainLlmClient {
Ok(response)
}
pub async fn generate_with_tools<F>(
&self,
model: &str,
max_tokens: u64,
messages: Vec<Message>,
tools: Vec<Tool>,
tool_choice: Option<ToolChoice>,
mut on_progress: F,
) -> Result<AnthropicResponse>
where
F: FnMut(usize, &str),
{
let request = AnthropicRequest {
model: model.to_string(),
max_tokens,
messages,
tools,
thinking: None,
tool_choice,
system: None,
metadata: None,
stop_sequences: Vec::new(),
temperature: None,
top_k: None,
top_p: None,
};
let mut stream = stream_completion(
self.http_client.as_ref(),
ANTHROPIC_API_URL,
&self.api_key,
request,
None,
)
.await
.map_err(|e| anyhow::anyhow!("{:?}", e))?;
let mut response: Option<AnthropicResponse> = None;
let mut text_content = String::new();
let mut tool_use_blocks: Vec<ResponseContent> = Vec::new();
let mut current_tool_name = String::new();
let mut current_tool_id = String::new();
let mut current_tool_input = String::new();
while let Some(event_result) = stream.next().await {
let event = event_result.map_err(|e| anyhow::anyhow!("{:?}", e))?;
match event {
Event::MessageStart { message } => {
response = Some(message);
}
Event::ContentBlockStart { content_block, .. } => {
if let ResponseContent::ToolUse { id, name, .. } = content_block {
current_tool_id = id;
current_tool_name = name;
current_tool_input.clear();
}
}
Event::ContentBlockDelta { delta, .. } => match delta {
anthropic::ContentDelta::TextDelta { text } => {
text_content.push_str(&text);
on_progress(text_content.len(), &text_content);
}
anthropic::ContentDelta::InputJsonDelta { partial_json } => {
current_tool_input.push_str(&partial_json);
on_progress(current_tool_input.len(), &current_tool_input);
}
_ => {}
},
Event::ContentBlockStop { .. } => {
if !current_tool_name.is_empty() {
let input: serde_json::Value =
serde_json::from_str(&current_tool_input).unwrap_or_default();
tool_use_blocks.push(ResponseContent::ToolUse {
id: std::mem::take(&mut current_tool_id),
name: std::mem::take(&mut current_tool_name),
input,
});
current_tool_input.clear();
}
}
_ => {}
}
}
let mut response = response.ok_or_else(|| anyhow::anyhow!("No response received"))?;
if !text_content.is_empty() {
response
.content
.push(ResponseContent::Text { text: text_content });
}
for tool_block in tool_use_blocks {
response.content.push(tool_block);
}
Ok(response)
}
}
pub struct BatchingLlmClient {

View File

@@ -17,6 +17,8 @@ pub static RUN_DIR: LazyLock<PathBuf> = LazyLock::new(|| {
.join(chrono::Local::now().format("%d-%m-%y-%H_%M_%S").to_string())
});
pub static LATEST_EXAMPLE_RUN_DIR: LazyLock<PathBuf> = LazyLock::new(|| DATA_DIR.join("latest"));
pub static LATEST_FAILED_EXAMPLES_DIR: LazyLock<PathBuf> =
LazyLock::new(|| DATA_DIR.join("latest_failed"));
pub static LLM_CACHE_DB: LazyLock<PathBuf> = LazyLock::new(|| CACHE_DIR.join("llm_cache.sqlite"));
pub static SYNTHESIZE_STATE_FILE: LazyLock<PathBuf> =
LazyLock::new(|| DATA_DIR.join("synthesize_state.json"));

View File

@@ -1,7 +1,7 @@
use crate::{
anthropic_client::PlainLlmClient,
git::{ensure_repo_cloned, run_git},
paths::{FAILED_EXAMPLES_DIR, SYNTHESIZE_STATE_FILE},
paths::{FAILED_EXAMPLES_DIR, LATEST_FAILED_EXAMPLES_DIR, SYNTHESIZE_STATE_FILE},
progress::{InfoStyle, Progress, Step, StepProgress},
};
use anthropic::ResponseContent;
@@ -99,6 +99,15 @@ pub async fn run_synthesize(config: SynthesizeConfig) -> Result<()> {
std::fs::create_dir_all(&config.output_dir)?;
std::fs::create_dir_all(&*FAILED_EXAMPLES_DIR)?;
// Create "latest_failed" symlink pointing to this run's failed directory
if LATEST_FAILED_EXAMPLES_DIR.is_symlink() {
std::fs::remove_file(&*LATEST_FAILED_EXAMPLES_DIR)?;
}
#[cfg(unix)]
std::os::unix::fs::symlink(&*FAILED_EXAMPLES_DIR, &*LATEST_FAILED_EXAMPLES_DIR)?;
#[cfg(windows)]
std::os::windows::fs::symlink_dir(&*FAILED_EXAMPLES_DIR, &*LATEST_FAILED_EXAMPLES_DIR)?;
let progress = Progress::global();
progress.set_total_examples(config.count);
@@ -361,7 +370,7 @@ fn build_prompt(config: &SynthesizeConfig, commit: &CommitInfo) -> String {
ANALYSIS:
Pattern: <one sentence describing the pattern>
Plan - I will output these hunks:
Steps:
1. <file:line-range> - <what this hunk does>
2. <file:line-range> - <what this hunk does>
3. <file:line-range> - <what this hunk does>
@@ -369,9 +378,6 @@ fn build_prompt(config: &SynthesizeConfig, commit: &CommitInfo) -> String {
NAME: <short description, like a commit message, under 60 chars>
REASONING:
<2-4 sentences explaining the pattern and why the expected patch follows from edit history>
EDIT_HISTORY:
Hunk 1:
@@ -468,14 +474,9 @@ async fn analyze_commit(
}];
let response = client
.generate_streaming(
"claude-sonnet-4-5",
8192,
messages,
|chars, _text| {
step_progress.set_substatus(format!("analyzing: {:.1}K", chars as f64 / 1000.0));
},
)
.generate_streaming("claude-sonnet-4-5", 8192, messages, |chars, _text| {
step_progress.set_substatus(format!("analyzing: {:.1}K", chars as f64 / 1000.0));
})
.await?;
// Extract text content from response
@@ -508,29 +509,14 @@ fn parse_claude_response(response: &str) -> Result<Option<ClaudeResponse>> {
.map(|l| l.strip_prefix("NAME:").unwrap_or("").trim().to_string())
.unwrap_or_else(|| "unnamed example".to_string());
// Parse ANALYSIS section (Claude's planning)
let analysis = extract_section(
// Parse ANALYSIS section (Claude's planning) - this is the primary reasoning
let reasoning = extract_section(
response,
"ANALYSIS:",
&["NAME:", "REASONING:", "EDIT_HISTORY:", "EXPECTED_PATCH:"],
)
.unwrap_or_default();
// Parse REASONING section
let reasoning_text = extract_section(
response,
"REASONING:",
&["EDIT_HISTORY:", "EXPECTED_PATCH:"],
)
.unwrap_or_default();
// Combine analysis and reasoning
let reasoning = if analysis.is_empty() {
reasoning_text
} else {
format!("{}\n\n{}", analysis, reasoning_text)
};
// Parse EDIT_HISTORY diff block
let edit_history_hunks = extract_diff_block(response, "EDIT_HISTORY:")?;