Files
zed/.github/workflows/run_cron_unit_evals.yml
Richard Feldman da3bab18fe Unit eval GPT-5 and Gemini 3 Pro (#43916)
Follow-up to #43907

Release Notes:

- N/A
2025-12-01 11:41:15 -05:00

78 lines
2.7 KiB
YAML

# Generated from xtask::workflows::run_cron_unit_evals
# Rebuild with `cargo xtask workflows`.
name: run_cron_unit_evals
env:
CARGO_TERM_COLOR: always
CARGO_INCREMENTAL: '0'
RUST_BACKTRACE: '1'
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
on:
schedule:
- cron: 47 1 * * 2
workflow_dispatch: {}
jobs:
cron_unit_evals:
runs-on: namespace-profile-16x32-ubuntu-2204
strategy:
matrix:
model:
- anthropic/claude-sonnet-4-5-latest
- anthropic/claude-opus-4-5-latest
- google/gemini-3-pro
- openai/gpt-5
fail-fast: false
steps:
- name: steps::checkout_repo
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
with:
clean: false
- name: steps::setup_cargo_config
run: |
mkdir -p ./../.cargo
cp ./.cargo/ci-config.toml ./../.cargo/config.toml
shell: bash -euxo pipefail {0}
- name: steps::cache_rust_dependencies_namespace
uses: namespacelabs/nscloud-cache-action@v1
with:
cache: rust
- name: steps::setup_linux
run: ./script/linux
shell: bash -euxo pipefail {0}
- name: steps::install_mold
run: ./script/install-mold
shell: bash -euxo pipefail {0}
- name: steps::download_wasi_sdk
run: ./script/download-wasi-sdk
shell: bash -euxo pipefail {0}
- name: steps::cargo_install_nextest
uses: taiki-e/install-action@nextest
- name: steps::clear_target_dir_if_large
run: ./script/clear-target-dir-if-larger-than 250
shell: bash -euxo pipefail {0}
- name: ./script/run-unit-evals
run: ./script/run-unit-evals
shell: bash -euxo pipefail {0}
env:
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
ZED_AGENT_MODEL: ${{ matrix.model }}
- name: steps::cleanup_cargo_config
if: always()
run: |
rm -rf ./../.cargo
shell: bash -euxo pipefail {0}
- name: run_agent_evals::cron_unit_evals::send_failure_to_slack
if: ${{ failure() }}
uses: slackapi/slack-github-action@b0fa283ad8fea605de13dc3f449259339835fc52
with:
method: chat.postMessage
token: ${{ secrets.SLACK_APP_ZED_UNIT_EVALS_BOT_TOKEN }}
payload: |
channel: C04UDRNNJFQ
text: "Unit Evals Failed: https://github.com/zed-industries/zed/actions/runs/${{ github.run_id }}"
concurrency:
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
cancel-in-progress: true