Compare commits

...

7 Commits

Author SHA1 Message Date
Richard Feldman
525a5505a0 Add retry policy for Refusal and MaxTokens 2025-11-18 14:27:33 -05:00
Richard Feldman
3180de3bc2 Convert Refusal and MaxTokens to LanguageModelCompletionError 2025-11-18 14:25:51 -05:00
Richard Feldman
2c917cc05b Add a comment 2025-11-18 14:25:40 -05:00
Richard Feldman
525f65855e Add Refusal and MaxTokens to LanguageModelCompletionError 2025-11-18 14:25:22 -05:00
Richard Feldman
67e6071ebc Remove unused import 2025-11-18 14:21:26 -05:00
Richard Feldman
145b8c31dc Use the current non-Option model 2025-11-18 14:19:35 -05:00
Richard Feldman
c2a4afdaef Don't swallow upstream HTTP errors 2025-11-18 14:19:20 -05:00
2 changed files with 52 additions and 13 deletions

View File

@@ -1252,9 +1252,17 @@ impl Thread {
log::trace!("Received completion event: {:?}", event);
match event {
Ok(event) => {
tool_results.extend(this.update(cx, |this, cx| {
this.handle_completion_event(event, event_stream, cx)
})??);
match this.update(cx, |this, cx| {
this.handle_completion_event(event, event_stream, model.clone(), cx)
})? {
Ok(answer) => {
tool_results.extend(answer);
}
Err(err) => {
error = Some(err);
break;
}
}
}
Err(err) => {
error = Some(err);
@@ -1383,8 +1391,10 @@ impl Thread {
&mut self,
event: LanguageModelCompletionEvent,
event_stream: &ThreadEventStream,
model: Arc<dyn LanguageModel>,
cx: &mut Context<Self>,
) -> Result<Option<Task<LanguageModelToolResult>>> {
) -> std::result::Result<Option<Task<LanguageModelToolResult>>, LanguageModelCompletionError>
{
log::trace!("Handling streamed completion event: {:?}", event);
use LanguageModelCompletionEvent::*;
@@ -1421,8 +1431,8 @@ impl Thread {
"Agent Thread Completion Usage Updated",
thread_id = self.id.to_string(),
prompt_id = self.prompt_id.to_string(),
model = self.model.as_ref().map(|m| m.telemetry_id()),
model_provider = self.model.as_ref().map(|m| m.provider_id().to_string()),
model = model.telemetry_id(),
model_provider = model.provider_id().to_string(),
input_tokens = usage.input_tokens,
output_tokens = usage.output_tokens,
cache_creation_input_tokens = usage.cache_creation_input_tokens,
@@ -1434,16 +1444,31 @@ impl Thread {
self.update_model_request_usage(amount, limit, cx);
}
StatusUpdate(
CompletionRequestStatus::Started
| CompletionRequestStatus::Queued { .. }
| CompletionRequestStatus::Failed { .. },
) => {}
CompletionRequestStatus::Started | CompletionRequestStatus::Queued { .. },
) => {
// No action needed for starting or queueing
}
StatusUpdate(CompletionRequestStatus::Failed {
code,
message,
request_id,
retry_after,
}) => {
return Err(LanguageModelCompletionError::from_cloud_failure(
model.provider_name(),
code,
message,
retry_after.map(Duration::from_secs_f64),
));
}
StatusUpdate(CompletionRequestStatus::ToolUseLimitReached) => {
self.tool_use_limit_reached = true;
}
Stop(StopReason::Refusal) => return Err(CompletionError::Refusal.into()),
Stop(StopReason::MaxTokens) => return Err(CompletionError::MaxTokens.into()),
Stop(StopReason::ToolUse | StopReason::EndTurn) => {}
Stop(StopReason::Refusal) => return Err(LanguageModelCompletionError::Refusal),
Stop(StopReason::MaxTokens) => return Err(LanguageModelCompletionError::MaxTokens),
Stop(StopReason::ToolUse | StopReason::EndTurn) => {
// Tool use will be handled separately, and ending the turn needs no action
}
}
Ok(None)
@@ -2133,6 +2158,14 @@ impl Thread {
delay: BASE_RETRY_DELAY,
max_attempts: 2,
}),
Refusal => {
// If the model refused the request, we can reasonably assume it will refuse on retry too.
None
}
MaxTokens => {
// Retrying won't help with having exceeded max tokens
None
}
}
}
}

View File

@@ -176,6 +176,12 @@ pub enum LanguageModelCompletionError {
error: serde_json::Error,
},
#[error("The model refused to complete the request, possibly because of a security policy.")]
Refusal,
#[error("The model could not respond because its maximum number of tokens has been exceeded.")]
MaxTokens,
// TODO: Ideally this would be removed in favor of having a comprehensive list of errors.
#[error(transparent)]
Other(#[from] anyhow::Error),