Allow passing model_name to evals (#42395)

Release Notes:

- N/A
This commit is contained in:
Conrad Irwin
2025-11-10 16:00:52 -07:00
committed by GitHub
parent b607077c08
commit 359521e91d
2 changed files with 21 additions and 35 deletions

View File

@@ -1,26 +1,19 @@
use gh_workflow::{
Event, Expression, Job, PullRequest, PullRequestType, Run, Schedule, Step, Use, Workflow,
WorkflowDispatch,
};
use gh_workflow::{Event, Expression, Job, Run, Schedule, Step, Use, Workflow, WorkflowDispatch};
use crate::tasks::workflows::{
runners::{self, Platform},
steps::{self, FluentBuilder as _, NamedJob, named, setup_cargo_config},
vars,
vars::{self, Input},
};
pub(crate) fn run_agent_evals() -> Workflow {
let agent_evals = agent_evals();
let model_name = Input::string("model_name", None);
named::workflow()
.on(Event::default()
.schedule([Schedule::default().cron("0 0 * * *")])
.pull_request(PullRequest::default().add_branch("**").types([
PullRequestType::Synchronize,
PullRequestType::Reopened,
PullRequestType::Labeled,
]))
.workflow_dispatch(WorkflowDispatch::default()))
.on(Event::default().workflow_dispatch(
WorkflowDispatch::default().add_input(model_name.name, model_name.input()),
))
.concurrency(vars::one_workflow_per_non_main_branch())
.add_env(("CARGO_TERM_COLOR", "always"))
.add_env(("CARGO_INCREMENTAL", 0))
@@ -28,29 +21,28 @@ pub(crate) fn run_agent_evals() -> Workflow {
.add_env(("ANTHROPIC_API_KEY", vars::ANTHROPIC_API_KEY))
.add_env(("ZED_CLIENT_CHECKSUM_SEED", vars::ZED_CLIENT_CHECKSUM_SEED))
.add_env(("ZED_EVAL_TELEMETRY", 1))
.add_env(("MODEL_NAME", model_name.to_string()))
.add_job(agent_evals.name, agent_evals.job)
}
fn agent_evals() -> NamedJob {
fn run_eval() -> Step<Run> {
named::bash("cargo run --package=eval -- --repetitions=8 --concurrency=1")
named::bash(
"cargo run --package=eval -- --repetitions=8 --concurrency=1 --model \"${MODEL_NAME}\"",
)
}
named::job(
Job::default()
.cond(Expression::new(indoc::indoc!{r#"
github.repository_owner == 'zed-industries' &&
(github.event_name != 'pull_request' || contains(github.event.pull_request.labels.*.name, 'run-eval'))
"#}))
.runs_on(runners::LINUX_DEFAULT)
.timeout_minutes(60_u32)
.timeout_minutes(60_u32 * 10)
.add_step(steps::checkout_repo())
.add_step(steps::cache_rust_dependencies_namespace())
.map(steps::install_linux_dependencies)
.add_step(setup_cargo_config(Platform::Linux))
.add_step(steps::script("cargo build --package=eval"))
.add_step(run_eval())
.add_step(steps::cleanup_cargo_config(Platform::Linux))
.add_step(steps::cleanup_cargo_config(Platform::Linux)),
)
}