Set cache breakpoint on second-to-last message (#27632)
Here's a sample `dbg!` of token usage after this change, for a small
agent thread:
```
[crates/assistant2/src/thread.rs:1092:25] &usage = TokenUsage {
input_tokens: 5354,
output_tokens: 184,
cache_creation_input_tokens: 0,
cache_read_input_tokens: 0,
}
[crates/assistant2/src/thread.rs:1092:25] &usage = TokenUsage {
input_tokens: 54,
output_tokens: 132,
cache_creation_input_tokens: 5518,
cache_read_input_tokens: 0,
}
[crates/assistant2/src/thread.rs:1092:25] &usage = TokenUsage {
input_tokens: 54,
output_tokens: 113,
cache_creation_input_tokens: 166,
cache_read_input_tokens: 5518,
}
[crates/assistant2/src/thread.rs:1092:25] &usage = TokenUsage {
input_tokens: 291,
output_tokens: 181,
cache_creation_input_tokens: 147,
cache_read_input_tokens: 5684,
}
```
Release Notes:
- N/A
This commit is contained in:
@@ -857,6 +857,13 @@ impl Thread {
|
||||
request.messages.push(request_message);
|
||||
}
|
||||
|
||||
// Set a cache breakpoint at the second-to-last message.
|
||||
// https://docs.anthropic.com/en/docs/build-with-claude/prompt-caching
|
||||
let breakpoint_index = request.messages.len() - 2;
|
||||
for (index, message) in request.messages.iter_mut().enumerate() {
|
||||
message.cache = index == breakpoint_index;
|
||||
}
|
||||
|
||||
if !referenced_context_ids.is_empty() {
|
||||
let mut context_message = LanguageModelRequestMessage {
|
||||
role: Role::User,
|
||||
|
||||
Reference in New Issue
Block a user