Feature: Add button to re-run failed jobs in Actions (#36924)
Fixes #35997 --------- Co-authored-by: Claude Sonnet 4.6 <noreply@anthropic.com> Co-authored-by: wxiaoguang <wxiaoguang@gmail.com>
This commit is contained in:
@@ -20,7 +20,27 @@ import (
|
||||
"xorm.io/builder"
|
||||
)
|
||||
|
||||
// GetAllRerunJobs get all jobs that need to be rerun when job should be rerun
|
||||
// GetFailedRerunJobs returns all failed jobs and their downstream dependent jobs that need to be rerun
|
||||
func GetFailedRerunJobs(allJobs []*actions_model.ActionRunJob) []*actions_model.ActionRunJob {
|
||||
rerunJobIDSet := make(container.Set[int64])
|
||||
var jobsToRerun []*actions_model.ActionRunJob
|
||||
|
||||
for _, job := range allJobs {
|
||||
if job.Status == actions_model.StatusFailure || job.Status == actions_model.StatusCancelled {
|
||||
for _, j := range GetAllRerunJobs(job, allJobs) {
|
||||
if !rerunJobIDSet.Contains(j.ID) {
|
||||
rerunJobIDSet.Add(j.ID)
|
||||
jobsToRerun = append(jobsToRerun, j)
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
return jobsToRerun
|
||||
}
|
||||
|
||||
// GetAllRerunJobs returns the target job and all jobs that transitively depend on it.
|
||||
// Downstream jobs are included regardless of their current status.
|
||||
func GetAllRerunJobs(job *actions_model.ActionRunJob, allJobs []*actions_model.ActionRunJob) []*actions_model.ActionRunJob {
|
||||
rerunJobs := []*actions_model.ActionRunJob{job}
|
||||
rerunJobsIDSet := make(container.Set[string])
|
||||
@@ -49,12 +69,12 @@ func GetAllRerunJobs(job *actions_model.ActionRunJob, allJobs []*actions_model.A
|
||||
return rerunJobs
|
||||
}
|
||||
|
||||
// RerunWorkflowRunJobs reruns all done jobs of a workflow run,
|
||||
// or reruns a selected job and all of its downstream jobs when targetJob is specified.
|
||||
func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run *actions_model.ActionRun, jobs []*actions_model.ActionRunJob, targetJob *actions_model.ActionRunJob) error {
|
||||
// Rerun is not allowed if the run is not done.
|
||||
// prepareRunRerun validates the run, resets its state, handles concurrency, persists the
|
||||
// updated run, and fires a status-update notification.
|
||||
// It returns isRunBlocked (true when the run itself is held by a concurrency group).
|
||||
func prepareRunRerun(ctx context.Context, repo *repo_model.Repository, run *actions_model.ActionRun, jobs []*actions_model.ActionRunJob) (isRunBlocked bool, err error) {
|
||||
if !run.Status.IsDone() {
|
||||
return util.NewInvalidArgumentErrorf("this workflow run is not done")
|
||||
return false, util.NewInvalidArgumentErrorf("this workflow run is not done")
|
||||
}
|
||||
|
||||
cfgUnit := repo.MustGetUnit(ctx, unit.TypeActions)
|
||||
@@ -62,7 +82,7 @@ func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run
|
||||
// Rerun is not allowed when workflow is disabled.
|
||||
cfg := cfgUnit.ActionsConfig()
|
||||
if cfg.IsWorkflowDisabled(run.WorkflowID) {
|
||||
return util.NewInvalidArgumentErrorf("workflow %s is disabled", run.WorkflowID)
|
||||
return false, util.NewInvalidArgumentErrorf("workflow %s is disabled", run.WorkflowID)
|
||||
}
|
||||
|
||||
// Reset run's timestamps and status.
|
||||
@@ -73,31 +93,31 @@ func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run
|
||||
|
||||
vars, err := actions_model.GetVariablesOfRun(ctx, run)
|
||||
if err != nil {
|
||||
return fmt.Errorf("get run %d variables: %w", run.ID, err)
|
||||
return false, fmt.Errorf("get run %d variables: %w", run.ID, err)
|
||||
}
|
||||
|
||||
if run.RawConcurrency != "" {
|
||||
var rawConcurrency model.RawConcurrency
|
||||
if err := yaml.Unmarshal([]byte(run.RawConcurrency), &rawConcurrency); err != nil {
|
||||
return fmt.Errorf("unmarshal raw concurrency: %w", err)
|
||||
return false, fmt.Errorf("unmarshal raw concurrency: %w", err)
|
||||
}
|
||||
|
||||
if err := EvaluateRunConcurrencyFillModel(ctx, run, &rawConcurrency, vars, nil); err != nil {
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
run.Status, err = PrepareToStartRunWithConcurrency(ctx, run)
|
||||
if err != nil {
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
}
|
||||
|
||||
if err := actions_model.UpdateRun(ctx, run, "started", "stopped", "previous_duration", "status", "concurrency_group", "concurrency_cancel"); err != nil {
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
if err := run.LoadAttributes(ctx); err != nil {
|
||||
return err
|
||||
return false, err
|
||||
}
|
||||
|
||||
for _, job := range jobs {
|
||||
@@ -106,23 +126,38 @@ func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run
|
||||
|
||||
notify_service.WorkflowRunStatusUpdate(ctx, run.Repo, run.TriggerUser, run)
|
||||
|
||||
isRunBlocked := run.Status == actions_model.StatusBlocked
|
||||
return run.Status == actions_model.StatusBlocked, nil
|
||||
}
|
||||
|
||||
if targetJob == nil {
|
||||
for _, job := range jobs {
|
||||
// If the job has needs, it should be blocked to wait for its dependencies.
|
||||
shouldBlockJob := len(job.Needs) > 0 || isRunBlocked
|
||||
if err := rerunWorkflowJob(ctx, job, shouldBlockJob); err != nil {
|
||||
return err
|
||||
}
|
||||
}
|
||||
// RerunWorkflowRunJobs reruns the given jobs of a workflow run.
|
||||
// jobsToRerun must include all jobs to be rerun (the target job and its transitively dependent jobs).
|
||||
// A job is blocked (waiting for dependencies) if the run itself is blocked or if any of its
|
||||
// needs are also being rerun.
|
||||
func RerunWorkflowRunJobs(ctx context.Context, repo *repo_model.Repository, run *actions_model.ActionRun, jobsToRerun []*actions_model.ActionRunJob) error {
|
||||
if len(jobsToRerun) == 0 {
|
||||
return nil
|
||||
}
|
||||
|
||||
rerunJobs := GetAllRerunJobs(targetJob, jobs)
|
||||
for _, job := range rerunJobs {
|
||||
// Jobs other than the selected one should wait for dependencies.
|
||||
shouldBlockJob := job.JobID != targetJob.JobID || isRunBlocked
|
||||
isRunBlocked, err := prepareRunRerun(ctx, repo, run, jobsToRerun)
|
||||
if err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
rerunJobIDs := make(container.Set[string])
|
||||
for _, j := range jobsToRerun {
|
||||
rerunJobIDs.Add(j.JobID)
|
||||
}
|
||||
|
||||
for _, job := range jobsToRerun {
|
||||
shouldBlockJob := isRunBlocked
|
||||
if !shouldBlockJob {
|
||||
for _, need := range job.Needs {
|
||||
if rerunJobIDs.Contains(need) {
|
||||
shouldBlockJob = true
|
||||
break
|
||||
}
|
||||
}
|
||||
}
|
||||
if err := rerunWorkflowJob(ctx, job, shouldBlockJob); err != nil {
|
||||
return err
|
||||
}
|
||||
|
||||
@@ -4,11 +4,14 @@
|
||||
package actions
|
||||
|
||||
import (
|
||||
"context"
|
||||
"testing"
|
||||
|
||||
actions_model "code.gitea.io/gitea/models/actions"
|
||||
"code.gitea.io/gitea/modules/util"
|
||||
|
||||
"github.com/stretchr/testify/assert"
|
||||
"github.com/stretchr/testify/require"
|
||||
)
|
||||
|
||||
func TestGetAllRerunJobs(t *testing.T) {
|
||||
@@ -46,3 +49,97 @@ func TestGetAllRerunJobs(t *testing.T) {
|
||||
assert.ElementsMatch(t, tc.rerunJobs, rerunJobs)
|
||||
}
|
||||
}
|
||||
|
||||
func TestGetFailedRerunJobs(t *testing.T) {
|
||||
// IDs must be non-zero to distinguish jobs in the dedup set.
|
||||
makeJob := func(id int64, jobID string, status actions_model.Status, needs ...string) *actions_model.ActionRunJob {
|
||||
return &actions_model.ActionRunJob{ID: id, JobID: jobID, Status: status, Needs: needs}
|
||||
}
|
||||
|
||||
t.Run("no failed jobs returns empty", func(t *testing.T) {
|
||||
jobs := []*actions_model.ActionRunJob{
|
||||
makeJob(1, "job1", actions_model.StatusSuccess),
|
||||
makeJob(2, "job2", actions_model.StatusSkipped, "job1"),
|
||||
}
|
||||
assert.Empty(t, GetFailedRerunJobs(jobs))
|
||||
})
|
||||
|
||||
t.Run("single failed job with no dependents", func(t *testing.T) {
|
||||
job1 := makeJob(1, "job1", actions_model.StatusFailure)
|
||||
job2 := makeJob(2, "job2", actions_model.StatusSuccess)
|
||||
jobs := []*actions_model.ActionRunJob{job1, job2}
|
||||
|
||||
result := GetFailedRerunJobs(jobs)
|
||||
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1}, result)
|
||||
})
|
||||
|
||||
t.Run("failed job pulls in downstream dependents", func(t *testing.T) {
|
||||
// job1 failed; job2 depends on job1 (skipped); job3 depends on job2 (skipped)
|
||||
job1 := makeJob(1, "job1", actions_model.StatusFailure)
|
||||
job2 := makeJob(2, "job2", actions_model.StatusSkipped, "job1")
|
||||
job3 := makeJob(3, "job3", actions_model.StatusSkipped, "job2")
|
||||
job4 := makeJob(4, "job4", actions_model.StatusSuccess) // unrelated, must not appear
|
||||
jobs := []*actions_model.ActionRunJob{job1, job2, job3, job4}
|
||||
|
||||
result := GetFailedRerunJobs(jobs)
|
||||
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2, job3}, result)
|
||||
})
|
||||
|
||||
t.Run("multiple independent failed jobs each pull in their own dependents", func(t *testing.T) {
|
||||
// job1 failed -> job3 depends on job1
|
||||
// job2 failed -> job4 depends on job2
|
||||
job1 := makeJob(1, "job1", actions_model.StatusFailure)
|
||||
job2 := makeJob(2, "job2", actions_model.StatusFailure)
|
||||
job3 := makeJob(3, "job3", actions_model.StatusSkipped, "job1")
|
||||
job4 := makeJob(4, "job4", actions_model.StatusSkipped, "job2")
|
||||
jobs := []*actions_model.ActionRunJob{job1, job2, job3, job4}
|
||||
|
||||
result := GetFailedRerunJobs(jobs)
|
||||
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2, job3, job4}, result)
|
||||
})
|
||||
|
||||
t.Run("shared downstream dependent is not duplicated", func(t *testing.T) {
|
||||
// job1 and job2 both failed; job3 depends on both
|
||||
job1 := makeJob(1, "job1", actions_model.StatusFailure)
|
||||
job2 := makeJob(2, "job2", actions_model.StatusFailure)
|
||||
job3 := makeJob(3, "job3", actions_model.StatusSkipped, "job1", "job2")
|
||||
jobs := []*actions_model.ActionRunJob{job1, job2, job3}
|
||||
|
||||
result := GetFailedRerunJobs(jobs)
|
||||
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2, job3}, result)
|
||||
assert.Len(t, result, 3) // job3 must appear exactly once
|
||||
})
|
||||
|
||||
t.Run("successful downstream job of a failed job is still included", func(t *testing.T) {
|
||||
// job1 failed; job2 succeeded but depends on job1 — downstream is always rerun
|
||||
// regardless of its own status (GetAllRerunJobs includes all transitive dependents)
|
||||
job1 := makeJob(1, "job1", actions_model.StatusFailure)
|
||||
job2 := makeJob(2, "job2", actions_model.StatusSuccess, "job1")
|
||||
jobs := []*actions_model.ActionRunJob{job1, job2}
|
||||
|
||||
result := GetFailedRerunJobs(jobs)
|
||||
assert.ElementsMatch(t, []*actions_model.ActionRunJob{job1, job2}, result)
|
||||
})
|
||||
}
|
||||
|
||||
func TestRerunValidation(t *testing.T) {
|
||||
runningRun := &actions_model.ActionRun{Status: actions_model.StatusRunning}
|
||||
|
||||
t.Run("RerunWorkflowRunJobs rejects a non-done run", func(t *testing.T) {
|
||||
jobs := []*actions_model.ActionRunJob{
|
||||
{ID: 1, JobID: "job1"},
|
||||
}
|
||||
err := RerunWorkflowRunJobs(context.Background(), nil, runningRun, jobs)
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, util.ErrInvalidArgument)
|
||||
})
|
||||
|
||||
t.Run("RerunWorkflowRunJobs rejects a non-done run when failed jobs exist", func(t *testing.T) {
|
||||
jobs := []*actions_model.ActionRunJob{
|
||||
{ID: 1, JobID: "job1", Status: actions_model.StatusFailure},
|
||||
}
|
||||
err := RerunWorkflowRunJobs(context.Background(), nil, runningRun, GetFailedRerunJobs(jobs))
|
||||
require.Error(t, err)
|
||||
assert.ErrorIs(t, err, util.ErrInvalidArgument)
|
||||
})
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user