2
.github/workflows/unit_evals.yml
vendored
2
.github/workflows/unit_evals.yml
vendored
@@ -63,7 +63,7 @@ jobs:
|
||||
|
||||
- name: Run unit evals
|
||||
shell: bash -euxo pipefail {0}
|
||||
run: cargo nextest run --workspace --no-fail-fast --features eval --no-capture -E 'test(::eval_)'
|
||||
run: cargo nextest run --workspace --no-fail-fast --features unit-eval --no-capture -E 'test(::eval_)'
|
||||
env:
|
||||
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
||||
|
||||
|
||||
@@ -11,7 +11,7 @@ path = "src/agent.rs"
|
||||
[features]
|
||||
test-support = ["db/test-support"]
|
||||
eval = []
|
||||
edit-agent-eval = []
|
||||
unit-eval = []
|
||||
e2e = []
|
||||
|
||||
[lints]
|
||||
|
||||
@@ -31,7 +31,7 @@ use std::{
|
||||
use util::path;
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_extract_handle_command_output() {
|
||||
// Test how well agent generates multiple edit hunks.
|
||||
//
|
||||
@@ -108,7 +108,7 @@ fn eval_extract_handle_command_output() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_delete_run_git_blame() {
|
||||
// Model | Pass rate
|
||||
// ----------------------------|----------
|
||||
@@ -171,7 +171,7 @@ fn eval_delete_run_git_blame() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_translate_doc_comments() {
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
@@ -234,7 +234,7 @@ fn eval_translate_doc_comments() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
@@ -360,7 +360,7 @@ fn eval_use_wasi_sdk_in_compile_parser_to_wasm() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_disable_cursor_blinking() {
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
@@ -446,7 +446,7 @@ fn eval_disable_cursor_blinking() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_from_pixels_constructor() {
|
||||
// Results for 2025-06-13
|
||||
//
|
||||
@@ -656,7 +656,7 @@ fn eval_from_pixels_constructor() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_zode() {
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
@@ -763,7 +763,7 @@ fn eval_zode() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_add_overwrite_test() {
|
||||
// Model | Pass rate
|
||||
// ============================================
|
||||
@@ -995,7 +995,7 @@ fn eval_add_overwrite_test() {
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg_attr(not(feature = "edit-agent-eval"), ignore)]
|
||||
#[cfg_attr(not(feature = "unit-eval"), ignore)]
|
||||
fn eval_create_empty_file() {
|
||||
// Check that Edit Agent can create a file without writing its
|
||||
// thoughts into it. This issue is not specific to empty files, but
|
||||
|
||||
Reference in New Issue
Block a user