68 lines
2.3 KiB
YAML
68 lines
2.3 KiB
YAML
# Generated from xtask::workflows::run_agent_evals
|
|
# Rebuild with `cargo xtask workflows`.
|
|
name: run_agent_evals
|
|
env:
|
|
CARGO_TERM_COLOR: always
|
|
CARGO_INCREMENTAL: '0'
|
|
RUST_BACKTRACE: '1'
|
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
|
|
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
|
|
ZED_CLIENT_CHECKSUM_SEED: ${{ secrets.ZED_CLIENT_CHECKSUM_SEED }}
|
|
ZED_EVAL_TELEMETRY: '1'
|
|
MODEL_NAME: ${{ inputs.model_name }}
|
|
on:
|
|
workflow_dispatch:
|
|
inputs:
|
|
model_name:
|
|
description: model_name
|
|
required: true
|
|
type: string
|
|
jobs:
|
|
agent_evals:
|
|
runs-on: namespace-profile-16x32-ubuntu-2204
|
|
steps:
|
|
- name: steps::checkout_repo
|
|
uses: actions/checkout@11bd71901bbe5b1630ceea73d27597364c9af683
|
|
with:
|
|
clean: false
|
|
- name: steps::cache_rust_dependencies_namespace
|
|
uses: namespacelabs/nscloud-cache-action@v1
|
|
with:
|
|
cache: rust
|
|
- name: steps::setup_linux
|
|
run: ./script/linux
|
|
shell: bash -euxo pipefail {0}
|
|
- name: steps::install_mold
|
|
run: ./script/install-mold
|
|
shell: bash -euxo pipefail {0}
|
|
- name: steps::download_wasi_sdk
|
|
run: ./script/download-wasi-sdk
|
|
shell: bash -euxo pipefail {0}
|
|
- name: steps::setup_cargo_config
|
|
run: |
|
|
mkdir -p ./../.cargo
|
|
cp ./.cargo/ci-config.toml ./../.cargo/config.toml
|
|
shell: bash -euxo pipefail {0}
|
|
- name: cargo build --package=eval
|
|
run: cargo build --package=eval
|
|
shell: bash -euxo pipefail {0}
|
|
- name: run_agent_evals::agent_evals::run_eval
|
|
run: cargo run --package=eval -- --repetitions=8 --concurrency=1 --model "${MODEL_NAME}"
|
|
shell: bash -euxo pipefail {0}
|
|
env:
|
|
ANTHROPIC_API_KEY: ${{ secrets.ANTHROPIC_API_KEY }}
|
|
OPENAI_API_KEY: ${{ secrets.OPENAI_API_KEY }}
|
|
GOOGLE_AI_API_KEY: ${{ secrets.GOOGLE_AI_API_KEY }}
|
|
GOOGLE_CLOUD_PROJECT: ${{ secrets.GOOGLE_CLOUD_PROJECT }}
|
|
- name: steps::cleanup_cargo_config
|
|
if: always()
|
|
run: |
|
|
rm -rf ./../.cargo
|
|
shell: bash -euxo pipefail {0}
|
|
timeout-minutes: 600
|
|
concurrency:
|
|
group: ${{ github.workflow }}-${{ github.ref_name }}-${{ github.ref_name == 'main' && github.sha || 'anysha' }}
|
|
cancel-in-progress: true
|