Feature: Add button to re-run failed jobs in Actions (#36924)

Fixes #35997

---------

Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com>
Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
bircni
2026-03-21 22:27:13 +01:00
committed by GitHub
parent ee009ebec8
commit b22123ef86
12 changed files with 332 additions and 45 deletions

View File

@@ -20,7 +20,27 @@ import (
"xorm.io/builder"
)
// GetAllRerunJobs get all jobs that need to be rerun when job should be rerun
// GetFailedRerunJobs returns all failed jobs and their downstream dependent jobs that need to be rerun
func GetFailedRerunJobs(allJobs []*actions_model.ActionRunJob) []*actions_model.ActionRunJob {
rerunJobIDSet := make(container.Set[int64])
var jobsToRerun []*actions_model.ActionRunJob
for _, job := range allJobs {
if job.Status == actions_model.StatusFailure || job.Status == actions_model.StatusCancelled {
for _, j := range GetAllRerunJobs(job, allJobs) {
if !rerunJobIDSet.Contains(j.ID) {
rerunJobIDSet.Add(j.ID)
jobsToRerun = append(jobsToRerun, j)
}
}
}
}
return jobsToRerun
}
// GetAllRerunJobs returns the target job and all jobs that transitively depend on it.
// Downstream jobs are included regardless of their current status.
func GetAllRerunJobs(job *actions_model.ActionRunJob, allJobs []*actions_model.ActionRunJob) []*actions_model.ActionRunJob {
rerunJobs := []*actions_model.ActionRunJob{job}
rerunJobsIDSet := make(container.Set[string])
@@ -49,12 +69,12 @@ func GetAllRerunJobs(job *actions_model.ActionRunJob, allJobs []*actions_model.A
return rerunJobs
}
// RerunWorkflowRunJobs reruns all done jobs of a workflow run,
// or reruns a selected job and all of its downstream jobs when targetJob is specified.
func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run *actions_model.ActionRun, jobs []*actions_model.ActionRunJob, targetJob *actions_model.ActionRunJob) error {
// Rerun is not allowed if the run is not done.
// prepareRunRerun validates the run, resets its state, handles concurrency, persists the
// updated run, and fires a status-update notification.
// It returns isRunBlocked (true when the run itself is held by a concurrency group).
func prepareRunRerun(ctx context.Context, repo *repo_model.Repository, run *actions_model.ActionRun, jobs []*actions_model.ActionRunJob) (isRunBlocked bool, err error) {
if !run.Status.IsDone() {
return util.NewInvalidArgumentErrorf("this workflow run is not done")
return false, util.NewInvalidArgumentErrorf("this workflow run is not done")
}
cfgUnit := repo.MustGetUnit(ctx, unit.TypeActions)
@@ -62,7 +82,7 @@ func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run
// Rerun is not allowed when workflow is disabled.
cfg := cfgUnit.ActionsConfig()
if cfg.IsWorkflowDisabled(run.WorkflowID) {
return util.NewInvalidArgumentErrorf("workflow %s is disabled", run.WorkflowID)
return false, util.NewInvalidArgumentErrorf("workflow %s is disabled", run.WorkflowID)
}
// Reset run's timestamps and status.
@@ -73,31 +93,31 @@ func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run
vars, err := actions_model.GetVariablesOfRun(ctx, run)
if err != nil {
return fmt.Errorf("get run %d variables: %w", run.ID, err)
return false, fmt.Errorf("get run %d variables: %w", run.ID, err)
}
if run.RawConcurrency != "" {
var rawConcurrency model.RawConcurrency
if err := yaml.Unmarshal([]byte(run.RawConcurrency), &rawConcurrency); err != nil {
return fmt.Errorf("unmarshal raw concurrency: %w", err)
return false, fmt.Errorf("unmarshal raw concurrency: %w", err)
}
if err := EvaluateRunConcurrencyFillModel(ctx, run, &rawConcurrency, vars, nil); err != nil {
return err
return false, err
}
run.Status, err = PrepareToStartRunWithConcurrency(ctx, run)
if err != nil {
return err
return false, err
}
}
if err := actions_model.UpdateRun(ctx, run, "started", "stopped", "previous_duration", "status", "concurrency_group", "concurrency_cancel"); err != nil {
return err
return false, err
}
if err := run.LoadAttributes(ctx); err != nil {
return err
return false, err
}
for _, job := range jobs {
@@ -106,23 +126,38 @@ func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run
notify_service.WorkflowRunStatusUpdate(ctx, run.Repo, run.TriggerUser, run)
isRunBlocked := run.Status == actions_model.StatusBlocked
return run.Status == actions_model.StatusBlocked, nil
}
if targetJob == nil {
for _, job := range jobs {
// If the job has needs, it should be blocked to wait for its dependencies.
shouldBlockJob := len(job.Needs) > 0 || isRunBlocked
if err := rerunWorkflowJob(ctx, job, shouldBlockJob); err != nil {
return err
}
}
// RerunWorkflowRunJobs reruns the given jobs of a workflow run.
// jobsToRerun must include all jobs to be rerun (the target job and its transitively dependent jobs).
// A job is blocked (waiting for dependencies) if the run itself is blocked or if any of its
// needs are also being rerun.
func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run *actions_model.ActionRun, jobsToRerun []*actions_model.ActionRunJob) error {
if len(jobsToRerun) == 0 {
return nil
}
rerunJobs := GetAllRerunJobs(targetJob, jobs)
for _, job := range rerunJobs {
// Jobs other than the selected one should wait for dependencies.
shouldBlockJob := job.JobID != targetJob.JobID || isRunBlocked
isRunBlocked, err := prepareRunRerun(ctx, repo, run, jobsToRerun)
if err != nil {
return err
}
rerunJobIDs := make(container.Set[string])
for _, j := range jobsToRerun {
rerunJobIDs.Add(j.JobID)
}
for _, job := range jobsToRerun {
shouldBlockJob := isRunBlocked
if !shouldBlockJob {
for _, need := range job.Needs {
if rerunJobIDs.Contains(need) {
shouldBlockJob = true
break
}
}
}
if err := rerunWorkflowJob(ctx, job, shouldBlockJob); err != nil {
return err
}

View File

@@ -4,11 +4,14 @@
package actions
import (
"context"
"testing"
actions_model "code.gitea.io/gitea/models/actions"
"code.gitea.io/gitea/modules/util"
"github.com/stretchr/testify/assert"
"github.com/stretchr/testify/require"
)
func TestGetAllRerunJobs(t *testing.T) {
@@ -46,3 +49,97 @@ func TestGetAllRerunJobs(t *testing.T) {
assert.ElementsMatch(t, tc.rerunJobs, rerunJobs)
}
}
func TestGetFailedRerunJobs(t *testing.T) {
// IDs must be non-zero to distinguish jobs in the dedup set.
makeJob := func(id int64, jobID string, status actions_model.Status, needs ...string) *actions_model.ActionRunJob {
return &actions_model.ActionRunJob{ID: id, JobID: jobID, Status: status, Needs: needs}
}
t.Run("no failed jobs returns empty", func(t *testing.T) {
jobs := []*actions_model.ActionRunJob{
makeJob(1, "job1", actions_model.StatusSuccess),
makeJob(2, "job2", actions_model.StatusSkipped, "job1"),
}
assert.Empty(t, GetFailedRerunJobs(jobs))
})
t.Run("single failed job with no dependents", func(t *testing.T) {
job1 := makeJob(1, "job1", actions_model.StatusFailure)
job2 := makeJob(2, "job2", actions_model.StatusSuccess)
jobs := []*actions_model.ActionRunJob{job1, job2}
result := GetFailedRerunJobs(jobs)
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1}, result)
})
t.Run("failed job pulls in downstream dependents", func(t *testing.T) {
// job1 failed; job2 depends on job1 (skipped); job3 depends on job2 (skipped)
job1 := makeJob(1, "job1", actions_model.StatusFailure)
job2 := makeJob(2, "job2", actions_model.StatusSkipped, "job1")
job3 := makeJob(3, "job3", actions_model.StatusSkipped, "job2")
job4 := makeJob(4, "job4", actions_model.StatusSuccess) // unrelated, must not appear
jobs := []*actions_model.ActionRunJob{job1, job2, job3, job4}
result := GetFailedRerunJobs(jobs)
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2, job3}, result)
})
t.Run("multiple independent failed jobs each pull in their own dependents", func(t *testing.T) {
// job1 failed -> job3 depends on job1
// job2 failed -> job4 depends on job2
job1 := makeJob(1, "job1", actions_model.StatusFailure)
job2 := makeJob(2, "job2", actions_model.StatusFailure)
job3 := makeJob(3, "job3", actions_model.StatusSkipped, "job1")
job4 := makeJob(4, "job4", actions_model.StatusSkipped, "job2")
jobs := []*actions_model.ActionRunJob{job1, job2, job3, job4}
result := GetFailedRerunJobs(jobs)
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2, job3, job4}, result)
})
t.Run("shared downstream dependent is not duplicated", func(t *testing.T) {
// job1 and job2 both failed; job3 depends on both
job1 := makeJob(1, "job1", actions_model.StatusFailure)
job2 := makeJob(2, "job2", actions_model.StatusFailure)
job3 := makeJob(3, "job3", actions_model.StatusSkipped, "job1", "job2")
jobs := []*actions_model.ActionRunJob{job1, job2, job3}
result := GetFailedRerunJobs(jobs)
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2, job3}, result)
assert.Len(t, result, 3) // job3 must appear exactly once
})
t.Run("successful downstream job of a failed job is still included", func(t *testing.T) {
// job1 failed; job2 succeeded but depends on job1 — downstream is always rerun
// regardless of its own status (GetAllRerunJobs includes all transitive dependents)
job1 := makeJob(1, "job1", actions_model.StatusFailure)
job2 := makeJob(2, "job2", actions_model.StatusSuccess, "job1")
jobs := []*actions_model.ActionRunJob{job1, job2}
result := GetFailedRerunJobs(jobs)
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2}, result)
})
}
func TestRerunValidation(t *testing.T) {
runningRun := &actions_model.ActionRun{Status: actions_model.StatusRunning}
t.Run("RerunWorkflowRunJobs rejects a non-done run", func(t *testing.T) {
jobs := []*actions_model.ActionRunJob{
{ID: 1, JobID: "job1"},
}
err := RerunWorkflowRunJobs(context.Background(), nil, runningRun, jobs)
require.Error(t, err)
assert.ErrorIs(t, err, util.ErrInvalidArgument)
})
t.Run("RerunWorkflowRunJobs rejects a non-done run when failed jobs exist", func(t *testing.T) {
jobs := []*actions_model.ActionRunJob{
{ID: 1, JobID: "job1", Status: actions_model.StatusFailure},
}
err := RerunWorkflowRunJobs(context.Background(), nil, runningRun, GetFailedRerunJobs(jobs))
require.Error(t, err)
assert.ErrorIs(t, err, util.ErrInvalidArgument)
})
}