Files
zed/.github/workflows/run_agent_evals.yml
Richard Feldman 908ef03502 Split out cron and non-cron unit evals (#42472)
Release Notes:

- N/A

---------

Co-authored-by: Bennet Bo Fenner <bennetbo@gmx.de>
2025-11-11 13:45:48 -05:00

68 lines
2.3 KiB
YAML

# Generated from xtask::workflows::run_agent_evals
# Rebuild with `cargo xtask workflows`.
name: run_agent_evals
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: '0'
RUST_BACKTRACE: '1'
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
ZED_EVAL_TELEMETRY: '1'
MODEL_NAME: ${{ inputs.model_name }}
on:
workflow_dispatch:
inputs:
model_name:
description: model_name
required: true
type: string
jobs:
agent_evals:
runs-on: namespace-profile-16x32-ubuntu-2204
steps:
- name: steps::checkout_repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
clean: false
- name: steps::cache_rust_dependencies_namespace
uses: namespacelabs/nscloud-cache-action@v1
with:
cache: rust
- name: steps::setup_linux
run: ./script/linux
shell: bash -euxo pipefail {0}
- name: steps::install_mold
run: ./script/install-mold
shell: bash -euxo pipefail {0}
- name: steps::download_wasi_sdk
run: ./script/download-wasi-sdk
shell: bash -euxo pipefail {0}
- name: steps::setup_cargo_config
run: |
mkdir -p ./../.cargo
cp ./.cargo/ci-config.toml ./../.cargo/config.toml
shell: bash -euxo pipefail {0}
- name: cargo build --package=eval
run: cargo build --package=eval
shell: bash -euxo pipefail {0}
- name: run_agent_evals::agent_evals::run_eval
run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
shell: bash -euxo pipefail {0}
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
- name: steps::cleanup_cargo_config
if: always()
run: |
rm -rf ./../.cargo
shell: bash -euxo pipefail {0}
timeout-minutes: 600
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true